diff --git a/java/yb-pgsql/src/test/java/org/yb/pgsql/BasePgSQLTest.java b/java/yb-pgsql/src/test/java/org/yb/pgsql/BasePgSQLTest.java index e49c5515883f..c28ece0f85c7 100644 --- a/java/yb-pgsql/src/test/java/org/yb/pgsql/BasePgSQLTest.java +++ b/java/yb-pgsql/src/test/java/org/yb/pgsql/BasePgSQLTest.java @@ -44,6 +44,8 @@ import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; import java.util.Scanner; import java.util.Set; import java.util.TreeMap; @@ -2132,6 +2134,22 @@ protected String getExplainAnalyzeOutput(Statement stmt, String query) throws Ex } } + static final Pattern roundtrips_pattern = Pattern.compile("Storage Read Requests: (\\d+)\\s*$"); + + protected Long getNumStorageRoundtrips(Statement stmt, String query) throws Exception { + try (ResultSet rs = stmt.executeQuery( + "EXPLAIN (ANALYZE, DIST, COSTS OFF, TIMING OFF) " + query)) { + while (rs.next()) { + String line = rs.getString(1); + Matcher m = roundtrips_pattern.matcher(line); + if (m.find()) { + return Long.parseLong(m.group(1)); + } + } + } + return null; + } + protected Long getNumDocdbRequests(Statement stmt, String query) throws Exception { // Executing query once just in case if master catalog cache is not refreshed stmt.execute(query); diff --git a/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgRegressHashInQueries.java b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgRegressHashInQueries.java new file mode 100644 index 000000000000..2c21335e4e42 --- /dev/null +++ b/java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgRegressHashInQueries.java @@ -0,0 +1,187 @@ +// Copyright (c) YugaByte, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations +// under the License. +// + +package org.yb.pgsql; + +import org.junit.Test; +import org.junit.runner.RunWith; + +import org.yb.util.YBTestRunnerNonTsanOnly; +import org.yb.util.RegexMatcher; + +import java.sql.Connection; +import java.sql.Statement; + +import java.util.HashSet; +import java.util.Set; + +import static org.yb.AssertionWrappers.*; + +@RunWith(value=YBTestRunnerNonTsanOnly.class) +public class TestPgRegressHashInQueries extends BasePgSQLTest { + + @Override + protected Integer getYsqlRequestLimit() { + // This is so number of roundtrips equals the number of request operators created. + return 1; + } + + @Test + public void testInQueryBatchingOnHashKey() throws Exception { + + String createTable = "CREATE TABLE t1 (a int PRIMARY KEY, b int) SPLIT INTO 3 TABLETS"; + String insertTable = "INSERT INTO t1 SELECT i, i FROM (SELECT generate_series(1, 1024) i) t"; + + try (Statement statement = connection.createStatement()) { + statement.execute(createTable); + statement.execute(insertTable); + } + + // Generate select query required to run batched IN. + // SELECT * FROM t1 WHERE a IN (1, 2, 3, .... 511, 512); + int num_rows = 512; + String query = "SELECT * FROM t1 WHERE a IN ("; + for(int i = 1; i < num_rows; ++i) { + query += i + ", "; + } + query += num_rows + ")"; + Set expectedRows = new HashSet<>(); + for (int i = 1; i <= num_rows; i++) { + expectedRows.add(new Row(i, i)); + } + + try (Statement statement = connection.createStatement()) { + statement.execute("SET yb_enable_hash_batch_in = false"); + long noBatchingNumRequests = getNumStorageRoundtrips(statement, query); + assertEquals(512, noBatchingNumRequests); + assertRowSet(statement, query, expectedRows); + + statement.execute("SET yb_enable_hash_batch_in = true"); + long batchingNumRequests = getNumStorageRoundtrips(statement, query); + assertRowSet(statement, query, expectedRows); + // We send three requests as the number of tablets created are three. + assertEquals(3, batchingNumRequests); + } + } + + @Test + public void testInQueryBatchingOnMixedKey() throws Exception { + + String createTable = + "CREATE TABLE t1 (a int, b int, PRIMARY KEY(a hash, b asc)) SPLIT INTO 3 TABLETS"; + String insertTable1 = + "INSERT INTO t1 SELECT i, i FROM (SELECT generate_series(1, 1024) i) t"; + String insertTable2 = + "INSERT INTO t1 SELECT i, i+1 FROM (SELECT generate_series(1, 1024) i) t"; + + try (Statement statement = connection.createStatement()) { + statement.execute(createTable); + statement.execute(insertTable1); + statement.execute(insertTable2); + } + + // Generate select query required to run batched IN. + // SELECT * FROM t1 WHERE a IN (1, 2, 3, .... 511, 512); + int upper_limit = 512; + String query = "SELECT * FROM t1 WHERE a IN ("; + for(int i = 1; i < upper_limit; ++i) { + query += i + ", "; + } + query += upper_limit + ") AND b IN ("; + + for(int i = 1; i < upper_limit; ++i) { + if ((i % 2) == 0) { + query += i + ", "; + } + } + query += upper_limit + ")"; + + Set expectedRows = new HashSet<>(); + for (int i = 1; i <= upper_limit; i++) { + if ((i % 2) == 1) { + expectedRows.add(new Row(i, i+1)); + } else { + expectedRows.add(new Row(i, i)); + } + } + + try (Statement statement = connection.createStatement()) { + statement.execute("SET yb_enable_hash_batch_in = false"); + long noBatchingNumRequests = getNumStorageRoundtrips(statement, query); + assertEquals(512, noBatchingNumRequests); + assertRowSet(statement, query, expectedRows); + + statement.execute("SET yb_enable_hash_batch_in = true"); + long batchingNumRequests = getNumStorageRoundtrips(statement, query); + assertRowSet(statement, query, expectedRows); + // We send three requests as the number of tablets created are three. + assertEquals(3, batchingNumRequests); + } + } + + @Test + public void testInQueryBatchingNestLoopHashKey() throws Exception { + String createTable1 = "CREATE TABLE x (a int PRIMARY KEY, b int) SPLIT INTO 3 TABLETS"; + String insertTable1 = "INSERT INTO x SELECT i*2, i FROM (SELECT generate_series(1, 4096) i) t"; + String createTable2 = "CREATE TABLE y (a int PRIMARY KEY, b int) SPLIT INTO 3 TABLETS"; + String insertTable2 = "INSERT INTO y SELECT i*5, i FROM (SELECT generate_series(1, 4096) i) t"; + + try (Statement statement = connection.createStatement()) { + statement.execute(createTable1); + statement.execute(insertTable1); + statement.execute(createTable2); + statement.execute(insertTable2); + } + + // Generate NL Join query and enable NL Join batching in it with different batch sizes. + // These get automatically converted to batched IN queries. We should expect the best + // performance when we enable IN batching. + String query = "SELECT * FROM x t1 JOIN y t2 ON t1.a = t2.a"; + + Set expectedRows = new HashSet<>(); + for (int i = 1; i <= 819; i++) { + expectedRows.add(new Row(i*10, i*5, i*10, i*2)); + } + + try (Statement statement = connection.createStatement()) { + // Enabling NL Join batching + statement.execute("SET enable_hashjoin = off"); + statement.execute("SET enable_mergejoin = off"); + statement.execute("SET enable_seqscan = off"); + statement.execute("SET enable_material = off"); + + statement.execute("SET yb_bnl_batch_size = 3;"); + statement.execute("SET yb_enable_hash_batch_in = false"); + long noBatchingSmallBatchSizeNumRPCs = getNumStorageRoundtrips(statement, query); + assertEquals(4102, noBatchingSmallBatchSizeNumRPCs); + assertRowSet(statement, query, expectedRows); + + statement.execute("SET yb_bnl_batch_size = 1024;"); + statement.execute("SET yb_enable_hash_batch_in = false"); + long noBatchingLargeBatchSizeNumRPCs = getNumStorageRoundtrips(statement, query); + assertEquals(4102, noBatchingLargeBatchSizeNumRPCs); + assertRowSet(statement, query, expectedRows); + + statement.execute("SET yb_bnl_batch_size = 1024;"); + statement.execute("SET yb_enable_hash_batch_in = true"); + long batchingLargeBatchSizeNumRPCs = getNumStorageRoundtrips(statement, query); + assertEquals(12, batchingLargeBatchSizeNumRPCs); + assertRowSet(statement, query, expectedRows); + } + } + + @Test + public void schedule() throws Exception { + runPgRegressTest("yb_hash_in_schedule"); + } +} diff --git a/src/postgres/contrib/postgres_fdw/expected/yb_pg_postgres_fdw.out b/src/postgres/contrib/postgres_fdw/expected/yb_pg_postgres_fdw.out index e278b259c6f0..2266b848fb89 100644 --- a/src/postgres/contrib/postgres_fdw/expected/yb_pg_postgres_fdw.out +++ b/src/postgres/contrib/postgres_fdw/expected/yb_pg_postgres_fdw.out @@ -812,9 +812,9 @@ SELECT * FROM ft2 WHERE c1 = ANY (ARRAY(SELECT c1 FROM ft1 WHERE c1 < 5)); c1 | c2 | c3 | c4 | c5 | c6 | c7 | c8 ----+----+-------+------------------------------+--------------------------+----+------------+----- 1 | 1 | 00001 | Fri Jan 02 00:00:00 1970 PST | Fri Jan 02 00:00:00 1970 | 1 | 1 | foo + 4 | 4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4 | 4 | foo 2 | 2 | 00002 | Sat Jan 03 00:00:00 1970 PST | Sat Jan 03 00:00:00 1970 | 2 | 2 | foo 3 | 3 | 00003 | Sun Jan 04 00:00:00 1970 PST | Sun Jan 04 00:00:00 1970 | 3 | 3 | foo - 4 | 4 | 00004 | Mon Jan 05 00:00:00 1970 PST | Mon Jan 05 00:00:00 1970 | 4 | 4 | foo (4 rows) -- we should not push order by clause with volatile expressions or unsafe diff --git a/src/postgres/src/backend/nodes/copyfuncs.c b/src/postgres/src/backend/nodes/copyfuncs.c index c47f25d2ad3b..52ca5fd0eb61 100644 --- a/src/postgres/src/backend/nodes/copyfuncs.c +++ b/src/postgres/src/backend/nodes/copyfuncs.c @@ -902,8 +902,11 @@ _copyYbBatchedNestLoop(const YbBatchedNestLoop *from) * copy remainder of node */ COPY_SCALAR_FIELD(num_hashClauseInfos); - COPY_POINTER_FIELD(hashClauseInfos, - from->num_hashClauseInfos * sizeof(YbBNLHashClauseInfo)); + + if (from->num_hashClauseInfos > 0) + COPY_POINTER_FIELD( + hashClauseInfos, + from->num_hashClauseInfos * sizeof(YbBNLHashClauseInfo)); for (int i = 0; i < from->num_hashClauseInfos; i++) { diff --git a/src/postgres/src/backend/utils/misc/guc.c b/src/postgres/src/backend/utils/misc/guc.c index 219a87cc5720..1de2b3cc2f73 100644 --- a/src/postgres/src/backend/utils/misc/guc.c +++ b/src/postgres/src/backend/utils/misc/guc.c @@ -2103,7 +2103,16 @@ static struct config_bool ConfigureNamesBool[] = true, NULL, NULL, NULL }, - + { + {"yb_enable_hash_batch_in", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("GUC variable that enables batching RPCs of generated for IN queries on hash " + "keys issued to the same tablets."), + NULL + }, + &yb_enable_hash_batch_in, + true, + NULL, NULL, NULL + }, { {"yb_bypass_cond_recheck", PGC_USERSET, QUERY_TUNING_METHOD, gettext_noop("If true then condition rechecking is bypassed at YSQL if the condition is bound to DocDB."), diff --git a/src/postgres/src/test/regress/expected/yb_create_index.out b/src/postgres/src/test/regress/expected/yb_create_index.out index 29229a37fc17..c24003d09fd9 100644 --- a/src/postgres/src/test/regress/expected/yb_create_index.out +++ b/src/postgres/src/test/regress/expected/yb_create_index.out @@ -1161,6 +1161,7 @@ SELECT * FROM test_method WHERE h2 = 258; 2 | 258 | 1 | 2 | 10 | 20 (1 row) +DROP TABLE test_method; -- Test more HASH key cases in PRIMARY KEY CREATE TABLE test_hash ( h1 int, h2 int, r1 int, r2 int, v1 int, v2 int); diff --git a/src/postgres/src/test/regress/expected/yb_hash_code.out b/src/postgres/src/test/regress/expected/yb_hash_code.out index d7ee07794080..b4e0221ec6cf 100644 --- a/src/postgres/src/test/regress/expected/yb_hash_code.out +++ b/src/postgres/src/test/regress/expected/yb_hash_code.out @@ -232,8 +232,7 @@ EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) SELECT x, yb_hash_code(x) Sort Method: quicksort Memory: 25kB -> Index Scan using test_table_one_primary_pkey on test_table_one_primary (actual rows=3 loops=1) Index Cond: ((x = ANY ('{1,2,3,4}'::integer[])) AND (yb_hash_code(x) < 50000)) - Rows Removed by Index Recheck: 1 -(6 rows) +(5 rows) SELECT x, yb_hash_code(x) FROM test_table_one_primary WHERE x IN (1, 2, 3, 4) AND yb_hash_code(x) < 50000 ORDER BY x; x | yb_hash_code @@ -938,9 +937,8 @@ EXPLAIN (COSTS OFF, TIMING OFF, SUMMARY OFF, ANALYZE) SELECT v1, yb_hash_code(v4 ---------------------------------------------------------------------------------------------------------------------- Index Only Scan using test_index_only_scan_recheck_v4_v1_idx on test_index_only_scan_recheck (actual rows=2 loops=1) Index Cond: ((v4 = ANY ('{1,2,3}'::integer[])) AND (yb_hash_code(v4) < 50000)) - Rows Removed by Index Recheck: 1 Heap Fetches: 0 -(4 rows) +(3 rows) SELECT v1, yb_hash_code(v4) FROM test_index_only_scan_recheck WHERE v4 IN (1, 2, 3) AND yb_hash_code(v4) < 50000; v1 | yb_hash_code diff --git a/src/postgres/src/test/regress/expected/yb_hash_in_queries.out b/src/postgres/src/test/regress/expected/yb_hash_in_queries.out new file mode 100644 index 000000000000..e84543f98cd6 --- /dev/null +++ b/src/postgres/src/test/regress/expected/yb_hash_in_queries.out @@ -0,0 +1,886 @@ +-- Testing IN queries on hash keys +set yb_enable_hash_batch_in = false; +CREATE TABLE test_method (h1 int, a int, b int, c int, d int, e int, PRIMARY KEY (h1 HASH)); +INSERT INTO test_method VALUES (1, 1, 1, 1, 1, 11), (2, 1, 1, 2, 2, 12), (19, 1, 2, 1, 3, 13), (3, 1, 2, 2, 4, 14), (4, 2, 1, 1, 5, 15), (5, 2, 1, 2, 6, 16), (6, 2, 2, 1, 7, 17), (7, 2, 2, 2, 8, 18), (8, 2, 999, 2, 8, 18), (9, 0, 1, 1, 9, 19), (10, 1, 1, 2, 10, 20), (11, 3, 1, 1, 1, 11), (12, 3, 1, 2, 2, 12), (13, 3, 2, 1, 3, 13), (14, 3, 2, 2, 4, 14), (15, 4, 1, 1, 1, 11), (16, 4, 1, 2, 2, 12), (17, 4, 2, 1, 3, 13), (18, 4, 2, 2, 4, 14); +SELECT * FROM test_method where h1 = 1; + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where h1 = 1 AND a = 1; + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where h1 = 1 AND b = 2; + h1 | a | b | c | d | e +----+---+---+---+---+--- +(0 rows) + +SELECT * FROM test_method where h1 = 2 AND a = 1 AND b = 1; + h1 | a | b | c | d | e +----+---+---+---+---+---- + 2 | 1 | 1 | 2 | 2 | 12 +(1 row) + +SELECT * FROM test_method where h1 = 1 AND a IN (1, 2, 3, 4); + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where h1 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4); + QUERY PLAN +-------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=4 loops=1) + Index Cond: (h1 = ANY ('{1,2,3,4}'::integer[])) + Storage Index Read Requests: 4 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4); + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND a = 1; + QUERY PLAN +-------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=3 loops=1) + Index Cond: (h1 = ANY ('{1,2,3,4}'::integer[])) + Remote Filter: (a = 1) + Storage Index Read Requests: 4 +(4 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND a = 1; + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 +(3 rows) + +set yb_enable_hash_batch_in = true; +SELECT * FROM test_method where h1 = 1; + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where h1 = 1 AND a = 1; + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where h1 = 1 AND b = 2; + h1 | a | b | c | d | e +----+---+---+---+---+--- +(0 rows) + +SELECT * FROM test_method where h1 = 2 AND a = 1 AND b = 1; + h1 | a | b | c | d | e +----+---+---+---+---+---- + 2 | 1 | 1 | 2 | 2 | 12 +(1 row) + +SELECT * FROM test_method where h1 = 1 AND a IN (1, 2, 3, 4); + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where h1 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4); + QUERY PLAN +-------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=4 loops=1) + Index Cond: (h1 = ANY ('{1,2,3,4}'::integer[])) + Storage Index Read Requests: 3 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4); + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 4 | 2 | 1 | 1 | 5 | 15 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 +(4 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND a = 1; + QUERY PLAN +-------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=3 loops=1) + Index Cond: (h1 = ANY ('{1,2,3,4}'::integer[])) + Remote Filter: (a = 1) + Storage Index Read Requests: 3 +(4 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND a = 1; + h1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 +(3 rows) + +DROP TABLE test_method; +-- Testing IN queries on range keys +set yb_enable_hash_batch_in = false; +CREATE TABLE test_method (r1 int, a int, b int, c int, d int, e int, PRIMARY KEY (r1 ASC)); +INSERT INTO test_method VALUES (1, 1, 1, 1, 1, 11), (2, 1, 1, 2, 2, 12), (19, 1, 2, 1, 3, 13), (3, 1, 2, 2, 4, 14), (4, 2, 1, 1, 5, 15), (5, 2, 1, 2, 6, 16), (6, 2, 2, 1, 7, 17), (7, 2, 2, 2, 8, 18), (8, 2, 999, 2, 8, 18), (9, 0, 1, 1, 9, 19), (10, 1, 1, 2, 10, 20), (11, 3, 1, 1, 1, 11), (12, 3, 1, 2, 2, 12), (13, 3, 2, 1, 3, 13), (14, 3, 2, 2, 4, 14), (15, 4, 1, 1, 1, 11), (16, 4, 1, 2, 2, 12), (17, 4, 2, 1, 3, 13), (18, 4, 2, 2, 4, 14); +SELECT * FROM test_method where r1 = 1; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 = 1 AND a = 1; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 = 1 AND b = 2; + r1 | a | b | c | d | e +----+---+---+---+---+--- +(0 rows) + +SELECT * FROM test_method where r1 = 2 AND a = 1 AND b = 1; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 2 | 1 | 1 | 2 | 2 | 12 +(1 row) + +SELECT * FROM test_method where r1 = 1 AND a IN (1, 2, 3, 4); + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4); + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a = 1; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 +(3 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND b = 2; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 3 | 1 | 2 | 2 | 4 | 14 +(1 row) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a = 1 AND b = 1; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 +(2 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a IN (1, 2, 3, 4); + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +set yb_enable_hash_batch_in = true; +SELECT * FROM test_method where r1 = 1; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 = 1 AND a = 1; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 = 1 AND b = 2; + r1 | a | b | c | d | e +----+---+---+---+---+--- +(0 rows) + +SELECT * FROM test_method where r1 = 2 AND a = 1 AND b = 1; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 2 | 1 | 1 | 2 | 2 | 12 +(1 row) + +SELECT * FROM test_method where r1 = 1 AND a IN (1, 2, 3, 4); + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4); + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a = 1; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 +(3 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND b = 2; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 3 | 1 | 2 | 2 | 4 | 14 +(1 row) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a = 1 AND b = 1; + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 +(2 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a IN (1, 2, 3, 4); + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + r1 | a | b | c | d | e +----+---+---+---+---+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +DROP TABLE test_method; +-- Testing IN queries on multi column hash keys +CREATE TABLE test_method (h1 int, h2 int, a int, b int, v1 int, v2 int, PRIMARY KEY ((h1, h2) HASH)); +INSERT INTO test_method VALUES (1, 1, 1, 1, 1, 11), (2, 1, 1, 2, 2, 12), (19, 1, 2, 1, 3, 13), (3, 1, 2, 2, 4, 14), (4, 2, 1, 1, 5, 15), (5, 2, 1, 2, 6, 16), (6, 2, 2, 1, 7, 17), (7, 2, 2, 2, 8, 18), (8, 2, 999, 2, 8, 18), (9, 0, 1, 1, 9, 19), (10, 1, 1, 2, 10, 20), (11, 3, 1, 1, 1, 11), (12, 3, 1, 2, 2, 12), (13, 3, 2, 1, 3, 13), (14, 3, 2, 2, 4, 14), (15, 4, 1, 1, 1, 11), (16, 4, 1, 2, 2, 12), (17, 4, 2, 1, 3, 13), (18, 4, 2, 2, 4, 14); +set yb_enable_hash_batch_in = false; +SELECT * FROM test_method where h1 = 1 ; + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2; + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND a = 1; + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND a = 1 AND b = 2; + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); + QUERY PLAN +----------------------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=4 loops=1) + Index Cond: ((h1 = ANY ('{1,2,3,4}'::integer[])) AND (h2 = ANY ('{1,2}'::integer[]))) + Storage Index Read Requests: 8 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +set yb_enable_hash_batch_in = true; +SELECT * FROM test_method where h1 = 1 ; + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2; + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND a = 1; + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND a = 1 AND b = 2; + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); + QUERY PLAN +----------------------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=4 loops=1) + Index Cond: ((h1 = ANY ('{1,2,3,4}'::integer[])) AND (h2 = ANY ('{1,2}'::integer[]))) + Storage Index Read Requests: 2 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); + h1 | h2 | a | b | v1 | v2 +----+----+---+---+----+---- + 4 | 2 | 1 | 1 | 5 | 15 + 3 | 1 | 2 | 2 | 4 | 14 + 2 | 1 | 1 | 2 | 2 | 12 + 1 | 1 | 1 | 1 | 1 | 11 +(4 rows) + +DROP TABLE test_method; +-- Testing IN queries on multi column range keys +CREATE TABLE test_method (r1 int, r2 int, a int, b int, v1 int, v2 int, PRIMARY KEY (r1 ASC, r2 ASC)); +INSERT INTO test_method VALUES (1, 1, 1, 1, 1, 11), (2, 1, 1, 2, 2, 12), (19, 1, 2, 1, 3, 13), (3, 1, 2, 2, 4, 14), (4, 2, 1, 1, 5, 15), (5, 2, 1, 2, 6, 16), (6, 2, 2, 1, 7, 17), (7, 2, 2, 2, 8, 18), (8, 2, 999, 2, 8, 18), (9, 0, 1, 1, 9, 19), (10, 1, 1, 2, 10, 20), (11, 3, 1, 1, 1, 11), (12, 3, 1, 2, 2, 12), (13, 3, 2, 1, 3, 13), (14, 3, 2, 2, 4, 14), (15, 4, 1, 1, 1, 11), (16, 4, 1, 2, 2, 12), (17, 4, 2, 1, 3, 13), (18, 4, 2, 2, 4, 14); +set yb_enable_hash_batch_in = false; +SELECT * FROM test_method where r1 = 1 ; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 = 1 AND r2 = 2; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where r1 = 1 AND r2 = 2 AND a = 1; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where r1 = 1 AND r2 = 2 AND a = 1 AND b = 2; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where r1 = 1 AND r2 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2); + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a = 1; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 4 | 2 | 1 | 1 | 5 | 15 +(3 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a = 1 AND b = 2; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 2 | 1 | 1 | 2 | 2 | 12 +(1 row) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +set yb_enable_hash_batch_in = true; +SELECT * FROM test_method where r1 = 1 ; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 = 1 AND r2 = 2; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where r1 = 1 AND r2 = 2 AND a = 1; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where r1 = 1 AND r2 = 2 AND a = 1 AND b = 2; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- +(0 rows) + +SELECT * FROM test_method where r1 = 1 AND r2 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 +(1 row) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2); + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a = 1; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 4 | 2 | 1 | 1 | 5 | 15 +(3 rows) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a = 1 AND b = 2; + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 2 | 1 | 1 | 2 | 2 | 12 +(1 row) + +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + r1 | r2 | a | b | v1 | v2 +----+----+---+---+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 2 | 1 | 1 | 2 | 2 | 12 + 3 | 1 | 2 | 2 | 4 | 14 + 4 | 2 | 1 | 1 | 5 | 15 +(4 rows) + +DROP TABLE test_method; +-- Testing IN queries on multi column hash and range keys +CREATE TABLE test_method (h1 int, h2 int, r1 int, r2 int, v1 int, v2 int, PRIMARY KEY ((h1, h2) HASH, r1, r2)); +INSERT INTO test_method VALUES (1, 1, 1, 1, 1, 11), (1, 1, 1, 2, 2, 12), (1, 1, 2, 1, 3, 13), (1, 1, 2, 2, 4, 14), (1, 2, 1, 1, 5, 15), (1, 2, 1, 2, 6, 16), (1, 2, 2, 1, 7, 17), (1, 2, 2, 2, 8, 18), (1, 2, 999, 2, 8, 18), (2, 0, 1, 1, 9, 19), (2, 1, 1, 2, 10, 20), (1, 3, 1, 1, 1, 11), (1, 3, 1, 2, 2, 12), (1, 3, 2, 1, 3, 13), (1, 3, 2, 2, 4, 14), (1, 4, 1, 1, 1, 11), (1, 4, 1, 2, 2, 12), (1, 4, 2, 1, 3, 13), (1, 4, 2, 2, 4, 14); +set yb_enable_hash_batch_in = false; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 ; + QUERY PLAN +-------------------------------------------------- + Seq Scan on test_method (actual rows=17 loops=1) + Remote Filter: (h1 = 1) + Storage Table Read Requests: 1 +(3 rows) + +SELECT * FROM test_method where h1 = 1 ; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+-----+----+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 1 | 1 | 1 | 2 | 2 | 12 + 1 | 1 | 2 | 1 | 3 | 13 + 1 | 1 | 2 | 2 | 4 | 14 + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 + 1 | 2 | 2 | 1 | 7 | 17 + 1 | 2 | 2 | 2 | 8 | 18 + 1 | 2 | 999 | 2 | 8 | 18 + 1 | 4 | 1 | 1 | 1 | 11 + 1 | 4 | 1 | 2 | 2 | 12 + 1 | 4 | 2 | 1 | 3 | 13 + 1 | 4 | 2 | 2 | 4 | 14 + 1 | 3 | 1 | 1 | 1 | 11 + 1 | 3 | 1 | 2 | 2 | 12 + 1 | 3 | 2 | 1 | 3 | 13 + 1 | 3 | 2 | 2 | 4 | 14 +(17 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2; + QUERY PLAN +-------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=5 loops=1) + Index Cond: ((h1 = 1) AND (h2 = 2)) + Storage Index Read Requests: 1 +(3 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+-----+----+----+---- + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 + 1 | 2 | 2 | 1 | 7 | 17 + 1 | 2 | 2 | 2 | 8 | 18 + 1 | 2 | 999 | 2 | 8 | 18 +(5 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1; + QUERY PLAN +-------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=2 loops=1) + Index Cond: ((h1 = 1) AND (h2 = 2) AND (r1 = 1)) + Storage Index Read Requests: 1 +(3 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 +(2 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1 AND r2 = 2; + QUERY PLAN +-------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=1 loops=1) + Index Cond: ((h1 = 1) AND (h2 = 2) AND (r1 = 1) AND (r2 = 2)) + Storage Index Read Requests: 1 +(3 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1 AND r2 = 2; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 1 | 2 | 1 | 2 | 6 | 16 +(1 row) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=4 loops=1) + Index Cond: ((h1 = 1) AND (h2 = 1) AND (r1 = ANY ('{1,2,3,4}'::integer[])) AND (r2 = ANY ('{1,2,3,4}'::integer[]))) + Storage Index Read Requests: 1 +(3 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 1 | 1 | 1 | 2 | 2 | 12 + 1 | 1 | 2 | 1 | 3 | 13 + 1 | 1 | 2 | 2 | 4 | 14 +(4 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); + QUERY PLAN +----------------------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=10 loops=1) + Index Cond: ((h1 = ANY ('{1,2,3,4}'::integer[])) AND (h2 = ANY ('{1,2}'::integer[]))) + Storage Index Read Requests: 8 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); + h1 | h2 | r1 | r2 | v1 | v2 +----+----+-----+----+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 1 | 1 | 1 | 2 | 2 | 12 + 1 | 1 | 2 | 1 | 3 | 13 + 1 | 1 | 2 | 2 | 4 | 14 + 2 | 1 | 1 | 2 | 10 | 20 + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 + 1 | 2 | 2 | 1 | 7 | 17 + 1 | 2 | 2 | 2 | 8 | 18 + 1 | 2 | 999 | 2 | 8 | 18 +(10 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Index Scan using test_method_pkey on test_method (actual rows=5 loops=1) + Index Cond: ((h1 = ANY ('{1,2,3,4}'::integer[])) AND (h2 = ANY ('{1,2}'::integer[])) AND (r1 = 1)) + Storage Index Read Requests: 8 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 1 | 1 | 1 | 2 | 2 | 12 + 2 | 1 | 1 | 2 | 10 | 20 + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 +(5 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1 AND r2 = 2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=3 loops=1) + Index Cond: ((h1 = ANY ('{1,2,3,4}'::integer[])) AND (h2 = ANY ('{1,2}'::integer[])) AND (r1 = 1) AND (r2 = 2)) + Storage Index Read Requests: 8 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1 AND r2 = 2; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 1 | 1 | 1 | 2 | 2 | 12 + 2 | 1 | 1 | 2 | 10 | 20 + 1 | 2 | 1 | 2 | 6 | 16 +(3 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=9 loops=1) + Index Cond: ((h1 = ANY ('{1,2,3,4}'::integer[])) AND (h2 = ANY ('{1,2}'::integer[])) AND (r1 = ANY ('{1,2,3,4}'::integer[])) AND (r2 = ANY ('{1,2,3,4}'::integer[]))) + Storage Index Read Requests: 8 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 1 | 1 | 1 | 2 | 2 | 12 + 1 | 1 | 2 | 1 | 3 | 13 + 1 | 1 | 2 | 2 | 4 | 14 + 2 | 1 | 1 | 2 | 10 | 20 + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 + 1 | 2 | 2 | 1 | 7 | 17 + 1 | 2 | 2 | 2 | 8 | 18 +(9 rows) + +set yb_enable_hash_batch_in = true; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 ; + QUERY PLAN +-------------------------------------------------- + Seq Scan on test_method (actual rows=17 loops=1) + Remote Filter: (h1 = 1) + Storage Table Read Requests: 1 +(3 rows) + +SELECT * FROM test_method where h1 = 1 ; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+-----+----+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 1 | 1 | 1 | 2 | 2 | 12 + 1 | 1 | 2 | 1 | 3 | 13 + 1 | 1 | 2 | 2 | 4 | 14 + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 + 1 | 2 | 2 | 1 | 7 | 17 + 1 | 2 | 2 | 2 | 8 | 18 + 1 | 2 | 999 | 2 | 8 | 18 + 1 | 4 | 1 | 1 | 1 | 11 + 1 | 4 | 1 | 2 | 2 | 12 + 1 | 4 | 2 | 1 | 3 | 13 + 1 | 4 | 2 | 2 | 4 | 14 + 1 | 3 | 1 | 1 | 1 | 11 + 1 | 3 | 1 | 2 | 2 | 12 + 1 | 3 | 2 | 1 | 3 | 13 + 1 | 3 | 2 | 2 | 4 | 14 +(17 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2; + QUERY PLAN +-------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=5 loops=1) + Index Cond: ((h1 = 1) AND (h2 = 2)) + Storage Index Read Requests: 1 +(3 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+-----+----+----+---- + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 + 1 | 2 | 2 | 1 | 7 | 17 + 1 | 2 | 2 | 2 | 8 | 18 + 1 | 2 | 999 | 2 | 8 | 18 +(5 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1; + QUERY PLAN +-------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=2 loops=1) + Index Cond: ((h1 = 1) AND (h2 = 2) AND (r1 = 1)) + Storage Index Read Requests: 1 +(3 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 +(2 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1 AND r2 = 2; + QUERY PLAN +-------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=1 loops=1) + Index Cond: ((h1 = 1) AND (h2 = 2) AND (r1 = 1) AND (r2 = 2)) + Storage Index Read Requests: 1 +(3 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1 AND r2 = 2; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 1 | 2 | 1 | 2 | 6 | 16 +(1 row) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); + QUERY PLAN +----------------------------------------------------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=4 loops=1) + Index Cond: ((h1 = 1) AND (h2 = 1) AND (r1 = ANY ('{1,2,3,4}'::integer[])) AND (r2 = ANY ('{1,2,3,4}'::integer[]))) + Storage Index Read Requests: 1 +(3 rows) + +SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 1 | 1 | 1 | 1 | 1 | 11 + 1 | 1 | 1 | 2 | 2 | 12 + 1 | 1 | 2 | 1 | 3 | 13 + 1 | 1 | 2 | 2 | 4 | 14 +(4 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); + QUERY PLAN +----------------------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=10 loops=1) + Index Cond: ((h1 = ANY ('{1,2,3,4}'::integer[])) AND (h2 = ANY ('{1,2}'::integer[]))) + Storage Index Read Requests: 2 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); + h1 | h2 | r1 | r2 | v1 | v2 +----+----+-----+----+----+---- + 2 | 1 | 1 | 2 | 10 | 20 + 1 | 1 | 1 | 1 | 1 | 11 + 1 | 1 | 1 | 2 | 2 | 12 + 1 | 1 | 2 | 1 | 3 | 13 + 1 | 1 | 2 | 2 | 4 | 14 + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 + 1 | 2 | 2 | 1 | 7 | 17 + 1 | 2 | 2 | 2 | 8 | 18 + 1 | 2 | 999 | 2 | 8 | 18 +(10 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1; + QUERY PLAN +------------------------------------------------------------------------------------------------------ + Index Scan using test_method_pkey on test_method (actual rows=5 loops=1) + Index Cond: ((h1 = ANY ('{1,2,3,4}'::integer[])) AND (h2 = ANY ('{1,2}'::integer[])) AND (r1 = 1)) + Storage Index Read Requests: 2 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 2 | 1 | 1 | 2 | 10 | 20 + 1 | 1 | 1 | 1 | 1 | 11 + 1 | 1 | 1 | 2 | 2 | 12 + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 +(5 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1 AND r2 = 2; + QUERY PLAN +------------------------------------------------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=3 loops=1) + Index Cond: ((h1 = ANY ('{1,2,3,4}'::integer[])) AND (h2 = ANY ('{1,2}'::integer[])) AND (r1 = 1) AND (r2 = 2)) + Storage Index Read Requests: 2 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1 AND r2 = 2; + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 2 | 1 | 1 | 2 | 10 | 20 + 1 | 1 | 1 | 2 | 2 | 12 + 1 | 2 | 1 | 2 | 6 | 16 +(3 rows) + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); + QUERY PLAN +------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + Index Scan using test_method_pkey on test_method (actual rows=9 loops=1) + Index Cond: ((h1 = ANY ('{1,2,3,4}'::integer[])) AND (h2 = ANY ('{1,2}'::integer[])) AND (r1 = ANY ('{1,2,3,4}'::integer[])) AND (r2 = ANY ('{1,2,3,4}'::integer[]))) + Storage Index Read Requests: 2 +(3 rows) + +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); + h1 | h2 | r1 | r2 | v1 | v2 +----+----+----+----+----+---- + 2 | 1 | 1 | 2 | 10 | 20 + 1 | 1 | 1 | 1 | 1 | 11 + 1 | 1 | 1 | 2 | 2 | 12 + 1 | 1 | 2 | 1 | 3 | 13 + 1 | 1 | 2 | 2 | 4 | 14 + 1 | 2 | 1 | 1 | 5 | 15 + 1 | 2 | 1 | 2 | 6 | 16 + 1 | 2 | 2 | 1 | 7 | 17 + 1 | 2 | 2 | 2 | 8 | 18 +(9 rows) + +DROP TABLE test_method; diff --git a/src/postgres/src/test/regress/expected/yb_join_batching.out b/src/postgres/src/test/regress/expected/yb_join_batching.out index 99cb2994c40b..74403a9de91c 100644 --- a/src/postgres/src/test/regress/expected/yb_join_batching.out +++ b/src/postgres/src/test/regress/expected/yb_join_batching.out @@ -38,12 +38,12 @@ SELECT * FROM p1 t1 JOIN p2 t2 ON t1.a = t2.a WHERE t1.a <= 100 AND t2.a <= 100; a | b | c | a | b | c ----+----+------+----+----+------ 78 | 3 | 0078 | 78 | 3 | 0078 - 12 | 12 | 0012 | 12 | 12 | 0012 90 | 15 | 0090 | 90 | 15 | 0090 + 12 | 12 | 0012 | 12 | 12 | 0012 6 | 6 | 0006 | 6 | 6 | 0006 + 96 | 21 | 0096 | 96 | 21 | 0096 42 | 17 | 0042 | 42 | 17 | 0042 48 | 23 | 0048 | 48 | 23 | 0048 - 96 | 21 | 0096 | 96 | 21 | 0096 60 | 10 | 0060 | 60 | 10 | 0060 72 | 22 | 0072 | 72 | 22 | 0072 36 | 11 | 0036 | 36 | 11 | 0036 @@ -74,8 +74,8 @@ SELECT * FROM p1 t1 JOIN p2 t2 ON t1.a = t2.a + 1 WHERE t1.a <= 100 AND t2.a <= 46 | 21 | 0046 | 45 | 20 | 0045 82 | 7 | 0082 | 81 | 6 | 0081 22 | 22 | 0022 | 21 | 21 | 0021 - 16 | 16 | 0016 | 15 | 15 | 0015 52 | 2 | 0052 | 51 | 1 | 0051 + 16 | 16 | 0016 | 15 | 15 | 0015 10 | 10 | 0010 | 9 | 9 | 0009 70 | 20 | 0070 | 69 | 19 | 0069 88 | 13 | 0088 | 87 | 12 | 0087 @@ -83,10 +83,10 @@ SELECT * FROM p1 t1 JOIN p2 t2 ON t1.a = t2.a + 1 WHERE t1.a <= 100 AND t2.a <= 100 | 0 | 0100 | 99 | 24 | 0099 64 | 14 | 0064 | 63 | 13 | 0063 76 | 1 | 0076 | 75 | 0 | 0075 - 28 | 3 | 0028 | 27 | 2 | 0027 94 | 19 | 0094 | 93 | 18 | 0093 - 34 | 9 | 0034 | 33 | 8 | 0033 + 28 | 3 | 0028 | 27 | 2 | 0027 40 | 15 | 0040 | 39 | 14 | 0039 + 34 | 9 | 0034 | 33 | 8 | 0033 4 | 4 | 0004 | 3 | 3 | 0003 (17 rows) @@ -125,7 +125,7 @@ SELECT * FROM p1 t1 JOIN p2 t2 ON t1.a - 1 = t2.a + 1 WHERE t1.a <= 100 AND t2.a -- Batching on compound clauses /*+ Leading((p2 p1)) */ EXPLAIN (COSTS OFF) SELECT * FROM p1 JOIN p2 ON p1.a = p2.b AND p2.a = p1.b; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------------------------ YB Batched Nested Loop Join Join Filter: ((p1.a = p2.b) AND (p1.b = p2.a)) @@ -135,7 +135,7 @@ SELECT * FROM p1 t1 JOIN p2 t2 ON t1.a - 1 = t2.a + 1 WHERE t1.a <= 100 AND t2.a (5 rows) /*+ Leading((p2 p1)) */ SELECT * FROM p1 JOIN p2 ON p1.a = p2.b AND p2.a = p1.b; - a | b | c | a | b | c + a | b | c | a | b | c ----+----+------+----+----+------ 12 | 12 | 0012 | 12 | 12 | 0012 6 | 6 | 0006 | 6 | 6 | 0006 @@ -145,7 +145,7 @@ SELECT * FROM p1 t1 JOIN p2 t2 ON t1.a - 1 = t2.a + 1 WHERE t1.a <= 100 AND t2.a (5 rows) explain (costs off) select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a <= 30; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------ YB Batched Nested Loop Left Join Join Filter: (((p1.a - 1) = p5.a) AND ((p1.b - 1) = p5.b)) @@ -156,30 +156,30 @@ explain (costs off) select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - (6 rows) select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a <= 30; - a | b | c | a | b | c + a | b | c | a | b | c ----+----+------+----+---+------ 6 | 6 | 0006 | 5 | 5 | 0055 12 | 12 | 0012 | | | 16 | 16 | 0016 | | | 4 | 4 | 0004 | 3 | 3 | 0033 10 | 10 | 0010 | 9 | 9 | 0099 - 18 | 18 | 0018 | | | + 18 | 18 | 0018 | | | 2 | 2 | 0002 | 1 | 1 | 0011 30 | 5 | 0030 | 29 | 4 | 0294 - 0 | 0 | 0000 | | | + 0 | 0 | 0000 | | | 8 | 8 | 0008 | 7 | 7 | 0077 - 14 | 14 | 0014 | | | - 22 | 22 | 0022 | | | + 14 | 14 | 0014 | | | + 22 | 22 | 0022 | | | 26 | 1 | 0026 | 25 | 0 | 0250 28 | 3 | 0028 | 27 | 2 | 0272 - 24 | 24 | 0024 | | | - 20 | 20 | 0020 | | | + 24 | 24 | 0024 | | | + 20 | 20 | 0020 | | | (16 rows) -- Batching should still be disabled if there is a filter -- clause on a batched relation. /*+ set(enable_seqscan on) IndexScan(p1 p1_b_idx) Leading((p2 p1)) */ EXPLAIN (COSTS OFF) SELECT * FROM p1 JOIN p2 ON p1.a = p2.b AND p2.a = p1.b; - QUERY PLAN + QUERY PLAN --------------------------------------- Nested Loop -> Seq Scan on p2 @@ -189,7 +189,7 @@ select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a (5 rows) /*+ set(enable_seqscan on) IndexScan(p1 p1_b_idx) Leading((p2 p3)) */ EXPLAIN (COSTS OFF) SELECT * FROM p1, p2, p3 where p1.a = p3.a AND p2.a = p3.a and p1.b = p2.b; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------- Nested Loop Join Filter: (p3.a = p1.a) @@ -204,7 +204,7 @@ select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a (10 rows) /*+ set(enable_seqscan on) Leading((p2 p1)) */ EXPLAIN (COSTS OFF) SELECT * FROM p1 JOIN p2 ON p1.a = p2.b AND p1.b < p2.b + 1; - QUERY PLAN + QUERY PLAN ------------------------------------------------------- Nested Loop -> Seq Scan on p2 @@ -213,7 +213,7 @@ select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a (4 rows) /*+IndexScan(p5 p5_hash)*/explain (costs off) select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a <= 30; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------ YB Batched Nested Loop Left Join Join Filter: (((p1.a - 1) = p5.a) AND ((p1.b - 1) = p5.b)) @@ -224,28 +224,28 @@ select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a (6 rows) /*+IndexScan(p5 p5_hash)*/ select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a <= 30; - a | b | c | a | b | c + a | b | c | a | b | c ----+----+------+----+---+------ 6 | 6 | 0006 | 5 | 5 | 0055 12 | 12 | 0012 | | | 16 | 16 | 0016 | | | - 4 | 4 | 0004 | 3 | 3 | 0033 10 | 10 | 0010 | 9 | 9 | 0099 - 18 | 18 | 0018 | | | - 2 | 2 | 0002 | 1 | 1 | 0011 + 4 | 4 | 0004 | 3 | 3 | 0033 + 18 | 18 | 0018 | | | 30 | 5 | 0030 | 29 | 4 | 0294 + 2 | 2 | 0002 | 1 | 1 | 0011 0 | 0 | 0000 | | | 8 | 8 | 0008 | 7 | 7 | 0077 14 | 14 | 0014 | | | 22 | 22 | 0022 | | | - 26 | 1 | 0026 | 25 | 0 | 0250 28 | 3 | 0028 | 27 | 2 | 0272 + 26 | 1 | 0026 | 25 | 0 | 0250 24 | 24 | 0024 | | | 20 | 20 | 0020 | | | (16 rows) /*+IndexScan(p5 p5_hash_asc)*/explain (costs off) select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a <= 30; - QUERY PLAN + QUERY PLAN ------------------------------------------------------------------------------------------------------------------------------ YB Batched Nested Loop Left Join Join Filter: (((p1.a - 1) = p5.a) AND ((p1.b - 1) = p5.b)) @@ -256,24 +256,24 @@ select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a (6 rows) /*+IndexScan(p5 p5_hash_asc)*/ select * from p1 left join p5 on p1.a - 1 = p5.a and p1.b - 1 = p5.b where p1.a <= 30; - a | b | c | a | b | c + a | b | c | a | b | c ----+----+------+----+---+------ 6 | 6 | 0006 | 5 | 5 | 0055 12 | 12 | 0012 | | | 16 | 16 | 0016 | | | - 4 | 4 | 0004 | 3 | 3 | 0033 10 | 10 | 0010 | 9 | 9 | 0099 - 18 | 18 | 0018 | | | + 4 | 4 | 0004 | 3 | 3 | 0033 + 18 | 18 | 0018 | | | 2 | 2 | 0002 | 1 | 1 | 0011 30 | 5 | 0030 | 29 | 4 | 0294 - 0 | 0 | 0000 | | | + 0 | 0 | 0000 | | | 8 | 8 | 0008 | 7 | 7 | 0077 - 14 | 14 | 0014 | | | - 22 | 22 | 0022 | | | - 26 | 1 | 0026 | 25 | 0 | 0250 + 14 | 14 | 0014 | | | + 22 | 22 | 0022 | | | 28 | 3 | 0028 | 27 | 2 | 0272 - 24 | 24 | 0024 | | | - 20 | 20 | 0020 | | | + 26 | 1 | 0026 | 25 | 0 | 0250 + 24 | 24 | 0024 | | | + 20 | 20 | 0020 | | | (16 rows) /*+ set(enable_seqscan true) Leading((p2 p1)) IndexScan(p1 p1_b_idx) */ EXPLAIN (COSTS OFF) SELECT * FROM p1 JOIN p2 ON p1.a = p2.b AND p2.a = p1.b; @@ -300,7 +300,7 @@ INSERT INTO t11 VALUES (1,2,0), (1,3,0), (5,2,0), (5,3,0), (5,4,0); CREATE TABLE t12 (c4 int, c2 int, y int); INSERT INTO t12 VALUES (3,7,0),(6,9,0),(9,7,0),(4,9,0); EXPLAIN (COSTS OFF) /*+ Leading((t12 (t11 t10))) Set(enable_seqscan true) */ SELECT t10.* FROM t12, t11, t10 WHERE x = y AND c1 = r1 AND c2 = r2 AND c3 = r3 AND c4 = r4 order by c1, c2, c3, c4; - QUERY PLAN + QUERY PLAN -------------------------------------------------------------------------------------------------------------------------------------------------- Sort Sort Key: t10.r1, t10.r2, t10.r3, t10.r4 @@ -314,7 +314,7 @@ EXPLAIN (COSTS OFF) /*+ Leading((t12 (t11 t10))) Set(enable_seqscan true) */ SEL (9 rows) /*+ Leading((t12 (t11 t10))) Set(enable_seqscan true) */ SELECT t10.* FROM t12, t11, t10 WHERE x = y AND c1 = r1 AND c2 = r2 AND c3 = r3 AND c4 = r4 order by c1, c2, c3, c4; - r1 | r2 | r3 | r4 + r1 | r2 | r3 | r4 ----+----+----+---- 1 | 7 | 2 | 3 1 | 7 | 2 | 9 @@ -346,7 +346,7 @@ create table d2(a int, primary key(a)); create table d3(a int, primary key(a)); create table d4(a int, primary key(a)); /*+Leading(((d2 (d3 d4)) d1))*/ explain (costs off) select * from d1,d2,d3,d4 where d1.a = d3.a and d2.a = d3.a and d4.a = d2.a; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------------- YB Batched Nested Loop Join Join Filter: (d3.a = d1.a) @@ -398,7 +398,7 @@ SELECT * FROM p3 t3 LEFT OUTER JOIN (SELECT t1.a as a FROM p1 t1 JOIN p2 t2 ON t (8 rows) EXPLAIN (COSTS OFF) SELECT * FROM p3 t3 RIGHT OUTER JOIN (SELECT t1.a as a FROM p1 t1 JOIN p2 t2 ON t1.a = t2.b WHERE t1.b <= 10 AND t2.b <= 15) s ON t3.a = s.a; - QUERY PLAN + QUERY PLAN --------------------------------------------------------------------------- YB Batched Nested Loop Left Join Join Filter: (t3.a = t1.a) @@ -421,29 +421,29 @@ SELECT * FROM p3 t3 RIGHT OUTER JOIN (SELECT t1.a as a FROM p1 t1 JOIN p2 t2 ON 10 | 10 | 0010 | 10 10 | 10 | 0010 | 10 | | | 8 - | | | 4 + 10 | 10 | 0010 | 10 | | | 2 | | | 2 0 | 0 | 0000 | 0 0 | 0 | 0000 | 0 - 10 | 10 | 0010 | 10 + | | | 4 0 | 0 | 0000 | 0 | | | 4 | | | 8 - | | | 6 - | | | 4 - | | | 4 10 | 10 | 0010 | 10 + | | | 4 + | | | 6 10 | 10 | 0010 | 10 + | | | 4 | | | 4 | | | 2 | | | 8 | | | 6 - | | | 4 + 10 | 10 | 0010 | 10 | | | 2 | | | 8 0 | 0 | 0000 | 0 - 10 | 10 | 0010 | 10 + | | | 4 | | | 6 0 | 0 | 0000 | 0 | | | 4 @@ -568,10 +568,10 @@ SELECT * FROM p1 t1 WHERE EXISTS (SELECT 1 FROM p2 t2 WHERE t1.a = t2.b) AND t1. 22 | 22 | 0022 20 | 20 | 0020 10 | 10 | 0010 - 2 | 2 | 0002 18 | 18 | 0018 - 6 | 6 | 0006 + 2 | 2 | 0002 16 | 16 | 0016 + 6 | 6 | 0006 12 | 12 | 0012 (13 rows) @@ -668,7 +668,7 @@ INSERT INTO s1 select i,i,i from generate_series(1,10) i; INSERT INTO s2 select i,i,i from generate_series(1,10) i; INSERT INTO s3 select i,i from generate_series(1,100) i; /*+Set(enable_seqscan true) Set(yb_bnl_batch_size 3) Leading((s2 (s1 s3))) NestLoop(s1 s3)*/explain (costs off) select s3.* from s1, s2, s3 where s3.r1 = s1.r1 and s3.r2 = s2.r2 and s1.r3 = s2.r3 order by s3.r1, s3.r2; - QUERY PLAN + QUERY PLAN -------------------------------------------------------------------------------------- Sort Sort Key: s3.r1, s3.r2 @@ -682,7 +682,7 @@ INSERT INTO s3 select i,i from generate_series(1,100) i; (9 rows) /*+Set(enable_seqscan true) Set(yb_bnl_batch_size 3) Leading((s2 (s1 s3))) NestLoop(s1 s3)*/select s3.* from s1, s2, s3 where s3.r1 = s1.r1 and s3.r2 = s2.r2 and s1.r3 = s2.r3 order by s3.r1, s3.r2; - r1 | r2 + r1 | r2 ----+---- 1 | 1 2 | 2 @@ -707,7 +707,7 @@ insert into s2 values (24), (25); insert into s3 values (24), (25); explain (costs off) /*+set(yb_bnl_batch_size 3) Leading(( ( s1 s2 ) s3 )) MergeJoin(s1 s2)*/select * from s1 left outer join s2 on s1.a = s2.a left outer join s3 on s2.a = s3.a where s1.a > 20; - QUERY PLAN + QUERY PLAN ----------------------------------------------------- YB Batched Nested Loop Left Join Join Filter: (s2.a = s3.a) @@ -722,7 +722,7 @@ on s1.a = s2.a left outer join s3 on s2.a = s3.a where s1.a > 20; /*+set(yb_bnl_batch_size 3) Leading(( ( s1 s2 ) s3 )) MergeJoin(s1 s2)*/ select * from s1 left outer join s2 on s1.a = s2.a left outer join s3 on s2.a = s3.a where s1.a > 20; - a | a | a + a | a | a ----+----+---- 24 | 24 | 24 25 | 25 | 25 @@ -738,7 +738,7 @@ insert into test2 values (1,0, 2,0,1), (2,0, 3,0,3), (2,0,3,0,5); create table test1 (a int, pp int, b int, pp2 int, c int, primary key(a asc, pp asc, b asc, pp2 asc, c asc)); insert into test1 values (1,0,2,0,1), (1,0,2,0,2), (2,0,3,0,3), (2,0,4,0,4), (2,0,4,0,5), (2,0,4,0,6); explain (costs off) select * from test1 p1 join test2 p2 on p1.a = p2.a AND p1.b = p2.b AND p1.c = p2.c; - QUERY PLAN + QUERY PLAN ----------------------------------------------------------------------------------------------------------- YB Batched Nested Loop Join Join Filter: ((p1.a = p2.a) AND (p1.b = p2.b) AND (p1.c = p2.c)) @@ -748,7 +748,7 @@ explain (costs off) select * from test1 p1 join test2 p2 on p1.a = p2.a AND p1.b (5 rows) select * from test1 p1 join test2 p2 on p1.a = p2.a AND p1.b = p2.b AND p1.c = p2.c; - a | pp | b | pp2 | c | a | pp | b | pp2 | c + a | pp | b | pp2 | c | a | pp | b | pp2 | c ---+----+---+-----+---+---+----+---+-----+--- 1 | 0 | 2 | 0 | 1 | 1 | 0 | 2 | 0 | 1 2 | 0 | 3 | 0 | 3 | 2 | 0 | 3 | 0 | 3 @@ -916,6 +916,37 @@ create index q3_range on q3(a asc); DROP TABLE q1; DROP TABLE q2; DROP TABLE q3; +create table g1(h int, r int, primary key(h hash, r asc)); +create table g2(h int, r int, primary key(h hash, r asc)); +create table main(h1 int, h2 int, r1 int, r2 int, primary key((h1,h2) hash, r1 asc, r2 asc)); +insert into main select i/1000, (i/100) % 10, (i/10) % 10, i % 10 from generate_series(1,9999) i; +insert into g1 values (1,3), (5,7); +insert into g2 values (2,4), (6,8); +/*+Leading((g1 (g2 main))) Set(enable_hashjoin off) Set(enable_mergejoin off) Set(yb_bnl_batch_size 3) Set(enable_material off) Set(enable_seqscan on)*/ explain (costs off) select main.* from g1,g2,main where main.h1 = g1.h and main.h2 = g2.h and main.r2 = g1.r and main.r1 = g2.r; + QUERY PLAN +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + YB Batched Nested Loop Join + Join Filter: ((main.h1 = g1.h) AND (main.r2 = g1.r)) + -> Seq Scan on g1 + -> YB Batched Nested Loop Join + Join Filter: ((g2.h = main.h2) AND (g2.r = main.r1)) + -> Seq Scan on g2 + -> Index Scan using main_pkey on main + Index Cond: ((ROW(h2, r1) = ANY (ARRAY[ROW(g2.h, g2.r), ROW($7, $10), ROW($8, $11)])) AND (ROW(h1, r2) = ANY (ARRAY[ROW(g1.h, g1.r), ROW($1, $4), ROW($2, $5)]))) +(8 rows) + +/*+Leading((g1 (g2 main))) Set(enable_hashjoin off) Set(enable_mergejoin off) Set(yb_bnl_batch_size 3) Set(enable_material off) Set(enable_seqscan on)*/ select main.* from g1,g2,main where main.h1 = g1.h and main.h2 = g2.h and main.r2 = g1.r and main.r1 = g2.r; + h1 | h2 | r1 | r2 +----+----+----+---- + 5 | 6 | 8 | 7 + 5 | 2 | 4 | 7 + 1 | 6 | 8 | 3 + 1 | 2 | 4 | 3 +(4 rows) + +drop table g1; +drop table g2; +drop table main; /*+Set(enable_hashjoin off) Set(enable_mergejoin off) Set(yb_bnl_batch_size 3) Set(enable_seqscan on) Set(enable_material off) Leading((q3 (q2 q1)))*/EXPLAIN (COSTS OFF) SELECT c.column_name, c.is_nullable = 'YES', c.udt_name, c.character_maximum_length, c.numeric_precision, c.numeric_precision_radix, c.numeric_scale, c.datetime_precision, 8 * typlen, c.column_default, pd.description, c.identity_increment diff --git a/src/postgres/src/test/regress/sql/yb_create_index.sql b/src/postgres/src/test/regress/sql/yb_create_index.sql index 834cd6c89c66..b69e513143eb 100644 --- a/src/postgres/src/test/regress/sql/yb_create_index.sql +++ b/src/postgres/src/test/regress/sql/yb_create_index.sql @@ -375,6 +375,7 @@ CREATE INDEX ON test_method ((h2) HASH); \d test_method EXPLAIN (COSTS OFF) SELECT * FROM test_method WHERE h2 = 258; SELECT * FROM test_method WHERE h2 = 258; +DROP TABLE test_method; -- Test more HASH key cases in PRIMARY KEY CREATE TABLE test_hash ( diff --git a/src/postgres/src/test/regress/sql/yb_hash_in_queries.sql b/src/postgres/src/test/regress/sql/yb_hash_in_queries.sql new file mode 100644 index 000000000000..ca0aed3e0db2 --- /dev/null +++ b/src/postgres/src/test/regress/sql/yb_hash_in_queries.sql @@ -0,0 +1,154 @@ +-- Testing IN queries on hash keys +set yb_enable_hash_batch_in = false; +CREATE TABLE test_method (h1 int, a int, b int, c int, d int, e int, PRIMARY KEY (h1 HASH)); +INSERT INTO test_method VALUES (1, 1, 1, 1, 1, 11), (2, 1, 1, 2, 2, 12), (19, 1, 2, 1, 3, 13), (3, 1, 2, 2, 4, 14), (4, 2, 1, 1, 5, 15), (5, 2, 1, 2, 6, 16), (6, 2, 2, 1, 7, 17), (7, 2, 2, 2, 8, 18), (8, 2, 999, 2, 8, 18), (9, 0, 1, 1, 9, 19), (10, 1, 1, 2, 10, 20), (11, 3, 1, 1, 1, 11), (12, 3, 1, 2, 2, 12), (13, 3, 2, 1, 3, 13), (14, 3, 2, 2, 4, 14), (15, 4, 1, 1, 1, 11), (16, 4, 1, 2, 2, 12), (17, 4, 2, 1, 3, 13), (18, 4, 2, 2, 4, 14); +SELECT * FROM test_method where h1 = 1; +SELECT * FROM test_method where h1 = 1 AND a = 1; +SELECT * FROM test_method where h1 = 1 AND b = 2; +SELECT * FROM test_method where h1 = 2 AND a = 1 AND b = 1; +SELECT * FROM test_method where h1 = 1 AND a IN (1, 2, 3, 4); +SELECT * FROM test_method where h1 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4); +SELECT * FROM test_method where h1 IN (1, 2, 3, 4); + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND a = 1; +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND a = 1; + +set yb_enable_hash_batch_in = true; +SELECT * FROM test_method where h1 = 1; +SELECT * FROM test_method where h1 = 1 AND a = 1; +SELECT * FROM test_method where h1 = 1 AND b = 2; +SELECT * FROM test_method where h1 = 2 AND a = 1 AND b = 1; +SELECT * FROM test_method where h1 = 1 AND a IN (1, 2, 3, 4); +SELECT * FROM test_method where h1 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4); +SELECT * FROM test_method where h1 IN (1, 2, 3, 4); + +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND a = 1; +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND a = 1; +DROP TABLE test_method; + +-- Testing IN queries on range keys +set yb_enable_hash_batch_in = false; +CREATE TABLE test_method (r1 int, a int, b int, c int, d int, e int, PRIMARY KEY (r1 ASC)); +INSERT INTO test_method VALUES (1, 1, 1, 1, 1, 11), (2, 1, 1, 2, 2, 12), (19, 1, 2, 1, 3, 13), (3, 1, 2, 2, 4, 14), (4, 2, 1, 1, 5, 15), (5, 2, 1, 2, 6, 16), (6, 2, 2, 1, 7, 17), (7, 2, 2, 2, 8, 18), (8, 2, 999, 2, 8, 18), (9, 0, 1, 1, 9, 19), (10, 1, 1, 2, 10, 20), (11, 3, 1, 1, 1, 11), (12, 3, 1, 2, 2, 12), (13, 3, 2, 1, 3, 13), (14, 3, 2, 2, 4, 14), (15, 4, 1, 1, 1, 11), (16, 4, 1, 2, 2, 12), (17, 4, 2, 1, 3, 13), (18, 4, 2, 2, 4, 14); +SELECT * FROM test_method where r1 = 1; +SELECT * FROM test_method where r1 = 1 AND a = 1; +SELECT * FROM test_method where r1 = 1 AND b = 2; +SELECT * FROM test_method where r1 = 2 AND a = 1 AND b = 1; +SELECT * FROM test_method where r1 = 1 AND a IN (1, 2, 3, 4); +SELECT * FROM test_method where r1 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +SELECT * FROM test_method where r1 IN (1, 2, 3, 4); +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a = 1; +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND b = 2; +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a = 1 AND b = 1; +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a IN (1, 2, 3, 4); +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +set yb_enable_hash_batch_in = true; +SELECT * FROM test_method where r1 = 1; +SELECT * FROM test_method where r1 = 1 AND a = 1; +SELECT * FROM test_method where r1 = 1 AND b = 2; +SELECT * FROM test_method where r1 = 2 AND a = 1 AND b = 1; +SELECT * FROM test_method where r1 = 1 AND a IN (1, 2, 3, 4); +SELECT * FROM test_method where r1 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +SELECT * FROM test_method where r1 IN (1, 2, 3, 4); +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a = 1; +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND b = 2; +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a = 1 AND b = 1; +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a IN (1, 2, 3, 4); +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +DROP TABLE test_method; + +-- Testing IN queries on multi column hash keys +CREATE TABLE test_method (h1 int, h2 int, a int, b int, v1 int, v2 int, PRIMARY KEY ((h1, h2) HASH)); +INSERT INTO test_method VALUES (1, 1, 1, 1, 1, 11), (2, 1, 1, 2, 2, 12), (19, 1, 2, 1, 3, 13), (3, 1, 2, 2, 4, 14), (4, 2, 1, 1, 5, 15), (5, 2, 1, 2, 6, 16), (6, 2, 2, 1, 7, 17), (7, 2, 2, 2, 8, 18), (8, 2, 999, 2, 8, 18), (9, 0, 1, 1, 9, 19), (10, 1, 1, 2, 10, 20), (11, 3, 1, 1, 1, 11), (12, 3, 1, 2, 2, 12), (13, 3, 2, 1, 3, 13), (14, 3, 2, 2, 4, 14), (15, 4, 1, 1, 1, 11), (16, 4, 1, 2, 2, 12), (17, 4, 2, 1, 3, 13), (18, 4, 2, 2, 4, 14); +set yb_enable_hash_batch_in = false; +SELECT * FROM test_method where h1 = 1 ; +SELECT * FROM test_method where h1 = 1 AND h2 = 2; +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND a = 1; +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND a = 1 AND b = 2; +SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); + +set yb_enable_hash_batch_in = true; +SELECT * FROM test_method where h1 = 1 ; +SELECT * FROM test_method where h1 = 1 AND h2 = 2; +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND a = 1; +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND a = 1 AND b = 2; +SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); +DROP TABLE test_method; + +-- Testing IN queries on multi column range keys +CREATE TABLE test_method (r1 int, r2 int, a int, b int, v1 int, v2 int, PRIMARY KEY (r1 ASC, r2 ASC)); +INSERT INTO test_method VALUES (1, 1, 1, 1, 1, 11), (2, 1, 1, 2, 2, 12), (19, 1, 2, 1, 3, 13), (3, 1, 2, 2, 4, 14), (4, 2, 1, 1, 5, 15), (5, 2, 1, 2, 6, 16), (6, 2, 2, 1, 7, 17), (7, 2, 2, 2, 8, 18), (8, 2, 999, 2, 8, 18), (9, 0, 1, 1, 9, 19), (10, 1, 1, 2, 10, 20), (11, 3, 1, 1, 1, 11), (12, 3, 1, 2, 2, 12), (13, 3, 2, 1, 3, 13), (14, 3, 2, 2, 4, 14), (15, 4, 1, 1, 1, 11), (16, 4, 1, 2, 2, 12), (17, 4, 2, 1, 3, 13), (18, 4, 2, 2, 4, 14); +set yb_enable_hash_batch_in = false; +SELECT * FROM test_method where r1 = 1 ; +SELECT * FROM test_method where r1 = 1 AND r2 = 2; +SELECT * FROM test_method where r1 = 1 AND r2 = 2 AND a = 1; +SELECT * FROM test_method where r1 = 1 AND r2 = 2 AND a = 1 AND b = 2; +SELECT * FROM test_method where r1 = 1 AND r2 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2); +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a = 1; +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a = 1 AND b = 2; +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +set yb_enable_hash_batch_in = true; +SELECT * FROM test_method where r1 = 1 ; +SELECT * FROM test_method where r1 = 1 AND r2 = 2; +SELECT * FROM test_method where r1 = 1 AND r2 = 2 AND a = 1; +SELECT * FROM test_method where r1 = 1 AND r2 = 2 AND a = 1 AND b = 2; +SELECT * FROM test_method where r1 = 1 AND r2 = 1 AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2); +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a = 1; +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a = 1 AND b = 2; +SELECT * FROM test_method where r1 IN (1, 2, 3, 4) AND r2 IN (1, 2) AND a IN (1, 2, 3, 4) AND b IN (1, 2, 3, 4); +DROP TABLE test_method; + + +-- Testing IN queries on multi column hash and range keys +CREATE TABLE test_method (h1 int, h2 int, r1 int, r2 int, v1 int, v2 int, PRIMARY KEY ((h1, h2) HASH, r1, r2)); +INSERT INTO test_method VALUES (1, 1, 1, 1, 1, 11), (1, 1, 1, 2, 2, 12), (1, 1, 2, 1, 3, 13), (1, 1, 2, 2, 4, 14), (1, 2, 1, 1, 5, 15), (1, 2, 1, 2, 6, 16), (1, 2, 2, 1, 7, 17), (1, 2, 2, 2, 8, 18), (1, 2, 999, 2, 8, 18), (2, 0, 1, 1, 9, 19), (2, 1, 1, 2, 10, 20), (1, 3, 1, 1, 1, 11), (1, 3, 1, 2, 2, 12), (1, 3, 2, 1, 3, 13), (1, 3, 2, 2, 4, 14), (1, 4, 1, 1, 1, 11), (1, 4, 1, 2, 2, 12), (1, 4, 2, 1, 3, 13), (1, 4, 2, 2, 4, 14); + +set yb_enable_hash_batch_in = false; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 ; +SELECT * FROM test_method where h1 = 1 ; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2; +SELECT * FROM test_method where h1 = 1 AND h2 = 2; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1; +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1 AND r2 = 2; +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1 AND r2 = 2; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); +SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1; +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1 AND r2 = 2; +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1 AND r2 = 2; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); + +set yb_enable_hash_batch_in = true; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 ; +SELECT * FROM test_method where h1 = 1 ; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2; +SELECT * FROM test_method where h1 = 1 AND h2 = 2; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1; +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1 AND r2 = 2; +SELECT * FROM test_method where h1 = 1 AND h2 = 2 AND r1 = 1 AND r2 = 2; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); +SELECT * FROM test_method where h1 = 1 AND h2 = 1 AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2); +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1; +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1 AND r2 = 2; +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 = 1 AND r2 = 2; +EXPLAIN (ANALYZE, DIST, COSTS OFF, SUMMARY OFF, TIMING OFF) SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); +SELECT * FROM test_method where h1 IN (1, 2, 3, 4) AND h2 IN (1, 2) AND r1 IN (1, 2, 3, 4) AND r2 IN (1, 2, 3, 4); +DROP TABLE test_method; diff --git a/src/postgres/src/test/regress/sql/yb_join_batching.sql b/src/postgres/src/test/regress/sql/yb_join_batching.sql index 3c34c0a43390..12831e4ee367 100644 --- a/src/postgres/src/test/regress/sql/yb_join_batching.sql +++ b/src/postgres/src/test/regress/sql/yb_join_batching.sql @@ -239,6 +239,21 @@ DROP TABLE q1; DROP TABLE q2; DROP TABLE q3; +create table g1(h int, r int, primary key(h hash, r asc)); +create table g2(h int, r int, primary key(h hash, r asc)); +create table main(h1 int, h2 int, r1 int, r2 int, primary key((h1,h2) hash, r1 asc, r2 asc)); +insert into main select i/1000, (i/100) % 10, (i/10) % 10, i % 10 from generate_series(1,9999) i; +insert into g1 values (1,3), (5,7); +insert into g2 values (2,4), (6,8); + +/*+Leading((g1 (g2 main))) Set(enable_hashjoin off) Set(enable_mergejoin off) Set(yb_bnl_batch_size 3) Set(enable_material off) Set(enable_seqscan on)*/ explain (costs off) select main.* from g1,g2,main where main.h1 = g1.h and main.h2 = g2.h and main.r2 = g1.r and main.r1 = g2.r; + +/*+Leading((g1 (g2 main))) Set(enable_hashjoin off) Set(enable_mergejoin off) Set(yb_bnl_batch_size 3) Set(enable_material off) Set(enable_seqscan on)*/ select main.* from g1,g2,main where main.h1 = g1.h and main.h2 = g2.h and main.r2 = g1.r and main.r1 = g2.r; + +drop table g1; +drop table g2; +drop table main; + /*+Set(enable_hashjoin off) Set(enable_mergejoin off) Set(yb_bnl_batch_size 3) Set(enable_seqscan on) Set(enable_material off) Leading((q3 (q2 q1)))*/EXPLAIN (COSTS OFF) SELECT c.column_name, c.is_nullable = 'YES', c.udt_name, c.character_maximum_length, c.numeric_precision, c.numeric_precision_radix, c.numeric_scale, c.datetime_precision, 8 * typlen, c.column_default, pd.description, c.identity_increment diff --git a/src/postgres/src/test/regress/yb_hash_in_schedule b/src/postgres/src/test/regress/yb_hash_in_schedule new file mode 100644 index 000000000000..65a2d978eeac --- /dev/null +++ b/src/postgres/src/test/regress/yb_hash_in_schedule @@ -0,0 +1,8 @@ +# src/test/regress/yb_hash_in_schedule +# +#################################################################################################### +# This testsuite includes tests on IN conditions on hash keys. These tests rely on +# the YsqlRequestLimit flag being set to 1. +# The tests in this schedule are not ported from PostgreSQL original tests +#################################################################################################### +test: yb_hash_in_queries diff --git a/src/yb/common/ql_scanspec.cc b/src/yb/common/ql_scanspec.cc index e43583689e50..f53b60f06c5f 100644 --- a/src/yb/common/ql_scanspec.cc +++ b/src/yb/common/ql_scanspec.cc @@ -103,50 +103,39 @@ auto GetColumnValue(const Col& col) { template void QLScanRange::Init(const Cond& condition) { - // If there is no range column, return. - if (schema_.num_range_key_columns() == 0) { - return; - } // Initialize the lower/upper bounds of each range column to null to mean it is unbounded. - ranges_.reserve(schema_.num_range_key_columns()); + ranges_.reserve(schema_.num_dockey_components()); + if (schema_.has_yb_hash_code()) { + ranges_.emplace(kYbHashCodeColId, QLRange()); + } + for (size_t i = 0; i < schema_.num_key_columns(); i++) { - if (schema_.is_range_column(i)) { - ranges_.emplace(schema_.column_id(i), QLRange()); - } + ranges_.emplace(schema_.column_id(i), QLRange()); } - // Check if there is a range column referenced in the operands. + // Check if there are range and hash columns referenced in the operands. const auto& operands = condition.operands(); bool has_range_column = false; + bool has_hash_column = false; using ExprCase = decltype(operands.begin()->expr_case()); - std::vector column_ids; for (const auto& operand : operands) { - column_ids.clear(); if (operand.expr_case() == ExprCase::kColumnId) { - column_ids.push_back(operand.column_id()); + auto id = operand.column_id(); + has_range_column |= schema_.is_range_column(ColumnId(id)); + has_hash_column |= (id == kYbHashCodeColId || schema_.is_hash_key_column(ColumnId(id))); + } else if (operand.expr_case() == ExprCase::kTuple) { - column_ids.reserve(operand.tuple().elems().size()); for (auto const& elem : operand.tuple().elems()) { DCHECK(elem.has_column_id()); - column_ids.push_back(elem.column_id()); - } - } - if (column_ids.empty()) { - continue; - } + auto id = elem.column_id(); - // For operand.expr_case() == ExprCase::kColumnId, there will be just one column and - // for operand.expr_case() == ExprCase::kTuple, all the columns are given to be range columns, - // so in order to set has_range_column as true it suffices to find a single range column. - for (auto id : column_ids) { - if (schema_.is_range_column(ColumnId(id))) { - has_range_column = true; - break; + has_range_column |= schema_.is_range_column(ColumnId(id)); + has_hash_column |= (id == kYbHashCodeColId || schema_.is_hash_key_column(ColumnId(id))); } } - if (has_range_column) { + if (has_range_column && has_hash_column) { break; } } @@ -336,6 +325,20 @@ void QLScanRange::Init(const Cond& condition) { has_in_range_options_ = true; } } + + // Check if there are hash columns as a part of IN options + if(has_hash_column) { + auto column_value = GetColumnValue(operands); + if (column_value.column_ids.size() > 1) { + for (const auto& col_id : column_value.column_ids) { + if (col_id.ToUint64() == kYbHashCodeColId || + schema_.is_hash_key_column(col_id)) { + has_in_hash_options_ = true; + break; + } + } + } + } return; } @@ -454,6 +457,7 @@ QLScanRange& QLScanRange::operator&=(const QLScanRange& other) { } } has_in_range_options_ = has_in_range_options_ || other.has_in_range_options_; + has_in_hash_options_ = has_in_hash_options_ || other.has_in_hash_options_; return *this; } @@ -479,6 +483,7 @@ QLScanRange& QLScanRange::operator|=(const QLScanRange& other) { } } has_in_range_options_ = has_in_range_options_ && other.has_in_range_options_; + has_in_hash_options_ = has_in_hash_options_ && other.has_in_hash_options_; return *this; } @@ -523,18 +528,19 @@ QLScanRange& QLScanRange::operator=(QLScanRange&& other) { //-------------------------------------- QL scan spec --------------------------------------- -QLScanSpec::QLScanSpec(QLExprExecutorPtr executor) - : QLScanSpec(nullptr, nullptr, true, std::move(executor)) { +QLScanSpec::QLScanSpec(const Schema& schema, + QLExprExecutorPtr executor) + : QLScanSpec(schema, nullptr, nullptr, true, std::move(executor)) { } -QLScanSpec::QLScanSpec(const QLConditionPB* condition, +QLScanSpec::QLScanSpec(const Schema& schema, + const QLConditionPB* condition, const QLConditionPB* if_condition, const bool is_forward_scan, QLExprExecutorPtr executor) - : YQLScanSpec(YQL_CLIENT_CQL), + : YQLScanSpec(YQL_CLIENT_CQL, schema, is_forward_scan), condition_(condition), if_condition_(if_condition), - is_forward_scan_(is_forward_scan), executor_(std::move(executor)) { if (executor_ == nullptr) { executor_ = std::make_shared(); @@ -557,9 +563,11 @@ Status QLScanSpec::Match(const QLTableRow& table_row, bool* match) const { //-------------------------------------- QL scan spec --------------------------------------- // Pgsql scan specification. -PgsqlScanSpec::PgsqlScanSpec(const PgsqlExpressionPB *where_expr, +PgsqlScanSpec::PgsqlScanSpec(const Schema& schema, + bool is_forward_scan, + const PgsqlExpressionPB *where_expr, QLExprExecutor::SharedPtr executor) - : YQLScanSpec(YQL_CLIENT_PGSQL), + : YQLScanSpec(YQL_CLIENT_PGSQL, schema, is_forward_scan), where_expr_(where_expr), executor_(executor) { if (executor_ == nullptr) { diff --git a/src/yb/common/ql_scanspec.h b/src/yb/common/ql_scanspec.h index 76d5fd9f5612..f8195626c5c4 100644 --- a/src/yb/common/ql_scanspec.h +++ b/src/yb/common/ql_scanspec.h @@ -23,6 +23,7 @@ #include "yb/common/common_fwd.h" #include "yb/common/column_id.h" #include "yb/common/common_types.pb.h" +#include "yb/common/schema.h" #include "yb/common/value.pb.h" namespace yb { @@ -33,7 +34,8 @@ namespace yb { //-------------------------------------------------------------------------------------------------- class YQLScanSpec { public: - explicit YQLScanSpec(QLClient client_type) : client_type_(client_type) { + YQLScanSpec(QLClient client_type, const Schema& schema, bool is_forward_scan) + : schema_(schema), is_forward_scan_(is_forward_scan), client_type_(client_type) { } virtual ~YQLScanSpec() { @@ -43,7 +45,30 @@ class YQLScanSpec { return client_type_; } + // for a particular column, give a column id, this function tries to determine its sorting type + SortingType get_sorting_type(size_t col_idx) { + return col_idx == kYbHashCodeColId || schema().is_hash_key_column(col_idx) ? + SortingType::kAscending : schema().column(col_idx).sorting_type(); + } + + // for a particular column, given a column id, this function tries to determine if it is + // a forward scan or a reverse scan + bool get_scan_direction(size_t col_idx) { + auto sorting_type = get_sorting_type(col_idx); + return is_forward_scan_ ^ (sorting_type == SortingType::kAscending || + sorting_type == SortingType::kAscendingNullsLast); + } + + bool is_forward_scan() const { + return is_forward_scan_; + } + + const Schema& schema() const { return schema_; } + + private: + const Schema& schema_; + const bool is_forward_scan_; const QLClient client_type_; }; @@ -62,15 +87,15 @@ class QLScanRange { class QLBound { public: - const QLValuePB &GetValue() const { return value_; } + const QLValuePB& GetValue() const { return value_; } bool IsInclusive() const { return is_inclusive_; } - bool operator<(const QLBound &other) const; - bool operator>(const QLBound &other) const; - bool operator==(const QLBound &other) const; + bool operator<(const QLBound& other) const; + bool operator>(const QLBound& other) const; + bool operator==(const QLBound& other) const; protected: - QLBound(const QLValuePB &value, bool is_inclusive, bool is_lower_bound); - QLBound(const LWQLValuePB &value, bool is_inclusive, bool is_lower_bound); + QLBound(const QLValuePB& value, bool is_inclusive, bool is_lower_bound); + QLBound(const LWQLValuePB& value, bool is_inclusive, bool is_lower_bound); QLValuePB value_; bool is_inclusive_ = true; @@ -80,20 +105,20 @@ class QLScanRange { // Upper bound class class QLUpperBound : public QLBound { public: - QLUpperBound(const QLValuePB &value, bool is_inclusive) + QLUpperBound(const QLValuePB& value, bool is_inclusive) : QLBound(value, is_inclusive, false) {} - QLUpperBound(const LWQLValuePB &value, bool is_inclusive) + QLUpperBound(const LWQLValuePB& value, bool is_inclusive) : QLBound(value, is_inclusive, false) {} }; // Lower bound class class QLLowerBound : public QLBound { public: - QLLowerBound(const QLValuePB &value, bool is_inclusive) + QLLowerBound(const QLValuePB& value, bool is_inclusive) : QLBound(value, is_inclusive, true) {} - QLLowerBound(const LWQLValuePB &value, bool is_inclusive) + QLLowerBound(const LWQLValuePB& value, bool is_inclusive) : QLBound(value, is_inclusive, true) {} }; @@ -114,12 +139,16 @@ class QLScanRange { std::vector GetColIds() const { std::vector col_id_list; - for (auto &it : ranges_) { + for (auto& it : ranges_) { col_id_list.push_back(it.first); } return col_id_list; } + bool has_in_hash_options() const { + return has_in_hash_options_; + } + bool has_in_range_options() const { return has_in_range_options_; } @@ -144,16 +173,19 @@ class QLScanRange { // Whether the condition has an IN condition on a range (clustering) column. // Used in doc_ql_scanspec to try to construct the set of options for a multi-point scan. bool has_in_range_options_ = false; + bool has_in_hash_options_ = false; }; // A scan specification for a QL scan. It may be used to scan either a specified doc key // or a hash key + optional WHERE condition clause. class QLScanSpec : public YQLScanSpec { public: - explicit QLScanSpec(QLExprExecutorPtr executor = nullptr); + explicit QLScanSpec(const Schema& schema, + QLExprExecutorPtr executor = nullptr); // Scan for the given hash key and a condition. - QLScanSpec(const QLConditionPB* condition, + QLScanSpec(const Schema& schema, + const QLConditionPB* condition, const QLConditionPB* if_condition, const bool is_forward_scan, QLExprExecutorPtr executor = nullptr); @@ -164,17 +196,9 @@ class QLScanSpec : public YQLScanSpec { // virtual to make the class polymorphic. virtual Status Match(const QLTableRow& table_row, bool* match) const; - bool is_forward_scan() const { - return is_forward_scan_; - } - - // Get Schema if available. - virtual const Schema* schema() const { return nullptr; } - protected: const QLConditionPB* condition_; const QLConditionPB* if_condition_; - const bool is_forward_scan_; QLExprExecutorPtr executor_; }; @@ -185,17 +209,19 @@ class PgsqlScanSpec : public YQLScanSpec { public: typedef std::unique_ptr UniPtr; - explicit PgsqlScanSpec(const PgsqlExpressionPB *where_expr, + explicit PgsqlScanSpec(const Schema& schema, + const bool is_forward_scan, + const PgsqlExpressionPB* where_expr, QLExprExecutorPtr executor = nullptr); virtual ~PgsqlScanSpec(); - const PgsqlExpressionPB *where_expr() { + const PgsqlExpressionPB* where_expr() { return where_expr_; } protected: - const PgsqlExpressionPB *where_expr_; + const PgsqlExpressionPB* where_expr_; QLExprExecutorPtr executor_; }; diff --git a/src/yb/common/ql_value.cc b/src/yb/common/ql_value.cc index 2914bea63cee..37214f1cc85a 100644 --- a/src/yb/common/ql_value.cc +++ b/src/yb/common/ql_value.cc @@ -948,7 +948,7 @@ bool BothNull(const QLValuePB& lhs, const QLValue& rhs) { std::vector GetTuplesSortedByOrdering( const QLSeqValuePB& options, const Schema& schema, bool is_forward_scan, - const std::vector& col_idxs) { + const ColumnListVector& col_idxs) { std::vector options_elems; options_elems.reserve(options.elems_size()); for (const auto& value : options.elems()) { @@ -986,9 +986,12 @@ std::vector GetTuplesSortedByOrdering( } if (i != tuple1.elems().size()) { - auto sorting_type = schema.column(col_idxs[i]).sorting_type(); + auto sorting_type = + col_idxs[i] == kYbHashCodeColId ? SortingType::kAscending + : schema.column(col_idxs[i]).sorting_type(); auto is_reverse_order = is_forward_scan ^ (sorting_type == SortingType::kAscending || - sorting_type == kAscendingNullsLast); + sorting_type == kAscendingNullsLast || + sorting_type == kNotSpecified); cmp ^= is_reverse_order; } return cmp; diff --git a/src/yb/common/ql_value.h b/src/yb/common/ql_value.h index 17ce91f0b8f7..2fcdbc162450 100644 --- a/src/yb/common/ql_value.h +++ b/src/yb/common/ql_value.h @@ -44,6 +44,8 @@ namespace yb { +using ColumnListVector = std::vector; + //-------------------------------------------------------------------------------------------------- void AppendToKey(const QLValuePB &value_pb, std::string *bytes); void AppendToKey(const LWQLValuePB &value_pb, std::string *bytes); @@ -600,7 +602,7 @@ void ConcatStrings(const Slice& lhs, const Slice& rhs, LWQLValuePB* result); std::vector GetTuplesSortedByOrdering( const QLSeqValuePB& options, const Schema& schema, bool is_forward_scan, - const std::vector& col_idxs); + const ColumnListVector& col_idxs); #define YB_SET_INT_VALUE(ql_valuepb, input, bits) \ case DataType::BOOST_PP_CAT(INT, bits): { \ diff --git a/src/yb/common/schema.h b/src/yb/common/schema.h index 8631753c996e..c8fbe1b0375c 100644 --- a/src/yb/common/schema.h +++ b/src/yb/common/schema.h @@ -78,6 +78,7 @@ namespace yb { class DeletedColumnPB; static const int kNoDefaultTtl = -1; +static const int kYbHashCodeColId = std::numeric_limits::max() - 1; // Struct for storing information about deleted columns for cleanup. struct DeletedColumn { @@ -797,6 +798,18 @@ class Schema { UpdateDocKeyOffsets(); } + bool has_yb_hash_code() const { + return num_hash_key_columns() > 0; + } + + size_t num_dockey_components() const { + return num_key_columns() + has_yb_hash_code(); + } + + size_t get_dockey_component_idx(size_t col_idx) const { + return col_idx == kYbHashCodeColId ? 0 : col_idx + has_yb_hash_code(); + } + // Gets the colocation ID of the non-primary table this schema belongs to in a // tablet with colocated tables. ColocationId colocation_id() const { diff --git a/src/yb/common/ybc_util.cc b/src/yb/common/ybc_util.cc index 2e57c46ff6ae..0cb0291ce216 100644 --- a/src/yb/common/ybc_util.cc +++ b/src/yb/common/ybc_util.cc @@ -41,6 +41,8 @@ DEFINE_test_flag(string, process_info_dir, string(), bool yb_debug_log_docdb_requests = false; +bool yb_enable_hash_batch_in = true; + bool yb_non_ddl_txn_for_sys_tables_allowed = false; bool yb_format_funcs_include_yb_metadata = false; diff --git a/src/yb/common/ybc_util.h b/src/yb/common/ybc_util.h index 2f178df974fa..99e90644b888 100644 --- a/src/yb/common/ybc_util.h +++ b/src/yb/common/ybc_util.h @@ -81,6 +81,12 @@ extern bool yb_binary_restore; */ extern bool yb_run_with_explain_analyze; +/* + * GUC variable that enables batching RPCs of generated for IN queries + * on hash keys issued to the same tablets. + */ +extern bool yb_enable_hash_batch_in; + /* * xcluster consistency level */ diff --git a/src/yb/docdb/cql_operation.cc b/src/yb/docdb/cql_operation.cc index 4544f4d96ce3..82a36697d372 100644 --- a/src/yb/docdb/cql_operation.cc +++ b/src/yb/docdb/cql_operation.cc @@ -1870,7 +1870,7 @@ Status QLReadOperation::PopulateResultSet(const std::unique_ptr& spe int rscol_index = 0; for (const QLExpressionPB& expr : request_.selected_exprs()) { QLExprResult value; - RETURN_NOT_OK(EvalExpr(expr, table_row, value.Writer(), spec->schema())); + RETURN_NOT_OK(EvalExpr(expr, table_row, value.Writer(), &spec->schema())); resultset->AppendColumn(rscol_index, value.Value()); rscol_index++; } diff --git a/src/yb/docdb/doc_key.cc b/src/yb/docdb/doc_key.cc index e4d205a76ac6..0c3219f49cfe 100644 --- a/src/yb/docdb/doc_key.cc +++ b/src/yb/docdb/doc_key.cc @@ -1283,15 +1283,9 @@ Result DocKeyDecoder::HasPrimitiveValue(AllowSpecial allow_special) { return docdb::HasPrimitiveValue(&input_, allow_special); } -Status DocKeyDecoder::DecodeToRangeGroup() { +Status DocKeyDecoder::DecodeToKeys() { RETURN_NOT_OK(DecodeCotableId()); RETURN_NOT_OK(DecodeColocationId()); - if (VERIFY_RESULT(DecodeHashCode())) { - while (VERIFY_RESULT(HasPrimitiveValue())) { - RETURN_NOT_OK(DecodeKeyEntryValue()); - } - } - return Status::OK(); } diff --git a/src/yb/docdb/doc_key.h b/src/yb/docdb/doc_key.h index d98bfa0ddc02..5d5ee630408b 100644 --- a/src/yb/docdb/doc_key.h +++ b/src/yb/docdb/doc_key.h @@ -396,6 +396,7 @@ class DocKeyDecoder { Result DecodeCotableId(Uuid* uuid = nullptr); Result DecodeColocationId(ColocationId* colocation_id = nullptr); + Status DecodeToKeys(); Result HasPrimitiveValue(AllowSpecial allow_special = AllowSpecial::kFalse); Result DecodeHashCode( @@ -424,8 +425,6 @@ class DocKeyDecoder { return &input_; } - Status DecodeToRangeGroup(); - private: Slice input_; }; diff --git a/src/yb/docdb/doc_pgsql_scanspec.cc b/src/yb/docdb/doc_pgsql_scanspec.cc index cc3da52afafe..bab71bd6e915 100644 --- a/src/yb/docdb/doc_pgsql_scanspec.cc +++ b/src/yb/docdb/doc_pgsql_scanspec.cc @@ -38,17 +38,15 @@ DocPgsqlScanSpec::DocPgsqlScanSpec(const Schema& schema, const DocKey& start_doc_key, bool is_forward_scan, const size_t prefix_length) - : PgsqlScanSpec(nullptr), - schema_(schema), + : PgsqlScanSpec(schema, is_forward_scan, nullptr), query_id_(query_id), hashed_components_(nullptr), range_components_(nullptr), - range_options_groups_(schema_.num_range_key_columns()), + options_groups_(schema.num_dockey_components()), hash_code_(hash_code), max_hash_code_(max_hash_code), start_doc_key_(start_doc_key.empty() ? KeyBytes() : start_doc_key.Encode()), lower_doc_key_(doc_key.Encode()), - is_forward_scan_(is_forward_scan), prefix_length_(prefix_length) { // Compute lower and upper doc_key. @@ -91,31 +89,19 @@ DocPgsqlScanSpec::DocPgsqlScanSpec( const DocKey& lower_doc_key, const DocKey& upper_doc_key, const size_t prefix_length) - : PgsqlScanSpec(where_expr), + : PgsqlScanSpec(schema, is_forward_scan, where_expr), range_bounds_(condition ? new QLScanRange(schema, *condition) : nullptr), - schema_(schema), query_id_(query_id), hashed_components_(&hashed_components.get()), range_components_(&range_components.get()), - range_options_groups_(schema_.num_range_key_columns()), + options_groups_(schema.num_dockey_components()), hash_code_(hash_code), max_hash_code_(max_hash_code), start_doc_key_(start_doc_key.empty() ? KeyBytes() : start_doc_key.Encode()), lower_doc_key_(lower_doc_key.Encode()), upper_doc_key_(upper_doc_key.Encode()), - is_forward_scan_(is_forward_scan), prefix_length_(prefix_length) { - auto lower_bound_key = bound_key(schema, true); - lower_doc_key_ = lower_bound_key > lower_doc_key_ - || lower_doc_key.empty() - ? lower_bound_key : lower_doc_key_; - - auto upper_bound_key = bound_key(schema, false); - upper_doc_key_ = upper_bound_key < upper_doc_key_ - || upper_doc_key.empty() - ? upper_bound_key : upper_doc_key_; - if (where_expr_) { // Should never get here until WHERE clause is supported. LOG(FATAL) << "DEVELOPERS: Add support for condition (where clause)"; @@ -125,24 +111,58 @@ DocPgsqlScanSpec::DocPgsqlScanSpec( range_bounds_indexes_ = range_bounds_->GetColIds(); } - // If the hash key is fixed and we have range columns with IN condition, try to construct the - // exact list of range options to scan for. - if ((!hashed_components_->empty() || schema_.num_hash_key_columns() == 0) && - schema_.num_range_key_columns() > 0 && - range_bounds_ && range_bounds_->has_in_range_options()) { + if (!hashed_components_->empty() && schema.num_hash_key_columns() > 0) { + options_ = std::make_shared>(schema.num_dockey_components()); + options_col_ids_.reserve(schema.num_dockey_components()); + + // should come here if we are not batching hash keys as a part of IN condition + options_groups_.BeginNewGroup(); + + // dockeys contains elements in the format yb_hash_code, hk1, hk2, ... hkn followed by + // rk1, rk2... rkn etc. As yb_hash_code is the first element and is not part of the schema + // we add it manually. + options_groups_.AddToLatestGroup(0); + options_col_ids_.emplace_back(ColumnId(kYbHashCodeColId)); + + (*options_)[0].push_back(KeyEntryValue::UInt16Hash(hash_code_.value())); + DCHECK(hashed_components_->size() == schema.num_hash_key_columns()); + for (size_t col_idx = 0; col_idx < schema.num_hash_key_columns(); ++col_idx) { + // Adding 1 to col_idx to account for hash_code column + options_groups_.AddToLatestGroup(schema.get_dockey_component_idx(col_idx)); + options_col_ids_.emplace_back(schema.column_id(col_idx)); + + (*options_)[schema.get_dockey_component_idx(col_idx)] + .push_back(std::move((*hashed_components_)[col_idx])); + } + } + + // We have hash or range columns with IN condition, try to construct the exact list of options to + // scan for. + if (range_bounds_ && + (range_bounds_->has_in_range_options() || range_bounds_->has_in_hash_options())) { DCHECK(condition); - range_options_ = std::make_shared>(schema_.num_range_key_columns()); - InitRangeOptions(*condition); + if (options_ == nullptr) + options_ = std::make_shared>(schema.num_dockey_components()); + InitOptions(*condition); } + + auto lower_bound_key = bound_key(schema, true); + lower_doc_key_ = lower_bound_key > lower_doc_key_ + || lower_doc_key.empty() + ? lower_bound_key : lower_doc_key_; + + auto upper_bound_key = bound_key(schema, false); + upper_doc_key_ = upper_bound_key < upper_doc_key_ + || upper_doc_key.empty() + ? upper_bound_key : upper_doc_key_; } -void DocPgsqlScanSpec::InitRangeOptions(const PgsqlConditionPB& condition) { - size_t num_hash_cols = schema_.num_hash_key_columns(); +void DocPgsqlScanSpec::InitOptions(const PgsqlConditionPB& condition) { switch (condition.op()) { case QLOperator::QL_OP_AND: for (const auto& operand : condition.operands()) { DCHECK(operand.has_condition()); - InitRangeOptions(operand.condition()); + InitOptions(operand.condition()); } break; @@ -150,6 +170,8 @@ void DocPgsqlScanSpec::InitRangeOptions(const PgsqlConditionPB& condition) { case QLOperator::QL_OP_IN: { DCHECK_EQ(condition.operands_size(), 2); // Skip any condition where LHS is not a column (e.g. subscript columns: 'map[k] = v') + // operands(0) always contains the column id. + // operands(1) contains the corresponding value or a list values. const auto& lhs = condition.operands(0); const auto& rhs = condition.operands(1); if (lhs.expr_case() != PgsqlExpressionPB::kColumnId && @@ -165,71 +187,81 @@ void DocPgsqlScanSpec::InitRangeOptions(const PgsqlConditionPB& condition) { DCHECK(condition.op() == QL_OP_IN || condition.op() == QL_OP_EQUAL); // move this up - if (lhs.has_column_id()) { - size_t col_idx = schema_.find_column_by_id(ColumnId(lhs.column_id())); + auto col_id = ColumnId(lhs.column_id()); + auto col_idx = schema().find_column_by_id(col_id); - // Skip any non-range columns. - if (!schema_.is_range_column(col_idx)) { - return; - } + // Skip any non-range columns. Hashed columns should always be sent as tuples along with + // their yb_hash_code. Hence, for hashed columns lhs should never be a column id. + DCHECK(schema().is_range_column(col_idx)); - SortingType sortingType = schema_.column(col_idx).sorting_type(); - range_options_indexes_.emplace_back(condition.operands(0).column_id()); + auto sortingType = get_sorting_type(col_idx); + + // Adding the offset if yb_hash_code is present after schema usages. Schema does not know + // about yb_hash_code_column + auto key_idx = schema().get_dockey_component_idx(col_idx); + + options_col_ids_.emplace_back(col_id); + options_groups_.BeginNewGroup(); + options_groups_.AddToLatestGroup(key_idx); - range_options_groups_.BeginNewGroup(); - range_options_groups_.AddToLatestGroup(col_idx - num_hash_cols); if (condition.op() == QL_OP_EQUAL) { - auto pv = KeyEntryValue::FromQLValuePBForKey(rhs.value(), sortingType); - (*range_options_)[col_idx - num_hash_cols].push_back(pv); - } else { + auto pv = KeyEntryValue::FromQLValuePBForKey(condition.operands(1).value(), sortingType); + (*options_)[key_idx].push_back(std::move(pv)); + } else { // QL_OP_IN + DCHECK_EQ(condition.op(), QL_OP_IN); DCHECK(rhs.value().has_list_value()); const auto &options = rhs.value().list_value(); int opt_size = options.elems_size(); - (*range_options_)[col_idx - num_hash_cols].reserve(opt_size); + (*options_)[key_idx].reserve(opt_size); // IN arguments should have been de-duplicated and ordered ascendingly by the executor. - bool is_reverse_order = is_forward_scan_ ^ (sortingType == SortingType::kAscending || - sortingType == SortingType::kAscendingNullsLast); + bool is_reverse_order = get_scan_direction(col_idx); for (int i = 0; i < opt_size; i++) { int elem_idx = is_reverse_order ? opt_size - i - 1 : i; const auto &elem = options.elems(elem_idx); auto pv = KeyEntryValue::FromQLValuePBForKey(elem, sortingType); - (*range_options_)[col_idx - num_hash_cols].push_back(pv); + (*options_)[key_idx].push_back(std::move(pv)); } } } else if (lhs.has_tuple()) { - // The lhs of this tuple IN condition might have a mix of hash and range columns. - // The hash columns in the lhs are always expected to appear to the left of all the - // range columns. We only take care to add the range components of the lhs to - // range_options_groups_ and range_options_indexes_. We compute start_range_col_idx to - // denote where in the lhs tuple range columns start appearing. size_t total_cols = lhs.tuple().elems_size(); DCHECK_GT(total_cols, 0); + // Whenever you have a tuple as a part of IN array, the query is of two types: + // 1. Range tuples SELECT * FROM table where (r1, r2) IN ((1, 1), (2, 2), (3,3)); + // 2. Hash tuples + // a. SELECT * FROM table where (h1, h2) IN ((1, 1), (2, 2), (3,3)); + // b. SELECT * FROM table where h1 IN (1, 2, 3, 4) AND h2 IN (5, 6, 7, 8); + // 3. Hash and range mix. + // The hash columns in the lhs are always expected to appear to the left of all the + // range columns. We only take care to add the range components of the lhs to + // options_groups_ and options_col_ids_. + // In each of these situations, the following steps have to be undertaken + // + // Step 1: Get the column ids of the elements + // For range tuples its (r1, r2). + // For hash tuples its (yb_hash_code, h1, h2), (yb_hash_code, h3, h4) + // Push them into the options groups and options indexes as hybrid scan utilizes to match + // target elements with their corresponding columns. int start_range_col_idx = 0; - std::vector col_idxs; - col_idxs.reserve(lhs.tuple().elems_size()); - - range_options_groups_.BeginNewGroup(); + ColumnListVector col_idxs; + options_groups_.BeginNewGroup(); for (const auto& elem : lhs.tuple().elems()) { DCHECK(elem.has_column_id()); ColumnId col_id = ColumnId(elem.column_id()); - int col_idx = schema_.find_column_by_id(col_id); + auto col_idx = elem.column_id() == kYbHashCodeColId ? kYbHashCodeColId + : schema().find_column_by_id(col_id); col_idxs.push_back(col_idx); - if (!schema_.is_range_column(col_idx)) { + if (!schema().is_range_column(col_idx)) { start_range_col_idx++; - DCHECK_EQ(start_range_col_idx, col_idxs.size()); - continue; } - range_options_groups_.AddToLatestGroup(col_idx - num_hash_cols); - range_options_indexes_.emplace_back(col_id); - } - - if (start_range_col_idx >= lhs.tuple().elems_size()) { - return; + options_col_ids_.emplace_back(col_id); + // yb_hash_code takes the 0th group. If there exists a yb_hash_code column, then we offset + // other columns by one position from what schema().find_column_by_id(col_id) provides us + options_groups_.AddToLatestGroup(schema().get_dockey_component_idx(col_idx)); } if (condition.op() == QL_OP_EQUAL) { @@ -237,56 +269,70 @@ void DocPgsqlScanSpec::InitRangeOptions(const PgsqlConditionPB& condition) { const auto& value = rhs.value().list_value(); DCHECK_EQ(total_cols, value.elems_size()); for (size_t i = start_range_col_idx; i < total_cols; i++) { - SortingType sorting_type = schema_.column(col_idxs[i]).sorting_type(); - Option option = - KeyEntryValue::FromQLValuePBForKey( - value.elems(static_cast(i)), sorting_type); - (*range_options_)[col_idxs[i] - num_hash_cols].push_back(std::move(option)); + // hash codes are always sorted ascending. + auto option = KeyEntryValue::FromQLValuePBForKey(value.elems(static_cast(i)), + get_sorting_type(col_idxs[i])); + auto options_idx = schema().get_dockey_component_idx(col_idxs[i]); + (*options_)[options_idx].push_back(std::move(option)); } } else if (condition.op() == QL_OP_IN) { // There should be no range columns before start_range_col_idx in col_idxs // and there should be no hash columns after start_range_col_idx DCHECK(std::find_if(col_idxs.begin(), col_idxs.begin() + start_range_col_idx, - [this] (int idx) { return schema_.is_range_column(idx); }) + [this] (int idx) { return schema().is_range_column(idx); }) == (col_idxs.begin() + start_range_col_idx)); DCHECK(std::find_if(col_idxs.begin() + start_range_col_idx, col_idxs.end(), - [this] (int idx) { return schema_.is_hash_key_column(idx); }) + [this] (int idx) { return schema().is_hash_key_column(idx); }) == (col_idxs.end())); + // Obtain the list of tuples that contain the target values. DCHECK(rhs.value().has_list_value()); const auto& options = rhs.value().list_value(); - int num_options = options.elems_size(); - // IN arguments should have been de-duplicated and ordered ascendingly by the - // executor. + // IN arguments should have been de-duplicated and ordered ascendingly by the executor. + // For range columns, yb_scan sorts them according to the range key values at the pggate + // layer itself. For hash key columns, we need to sort the options based on the + // yb_hash_code value. This enables the docDB iterator to pursue a one pass scan on the + // list of hash key columns + // + // Step 2: Obtain the sorting order for elements. For hash key columns its always + // SortingType::kAscending based on the yb_hash_code, and then the individual hash key + // components subsequently. For range key columns, we try to obtain it from the column + // structure. std::vector reverse; reverse.reserve(total_cols); for (size_t i = 0; i < total_cols; i++) { - SortingType sorting_type = schema_.column(col_idxs[i]).sorting_type(); - bool is_reverse_order = - is_forward_scan_ ^ - (sorting_type == SortingType::kAscending || - sorting_type == SortingType::kAscendingNullsLast); - reverse.push_back(is_reverse_order); + reverse.push_back(get_scan_direction(col_idxs[i])); } const auto sorted_options = - GetTuplesSortedByOrdering(options, schema_, is_forward_scan_, col_idxs); + GetTuplesSortedByOrdering(options, schema(), is_forward_scan(), col_idxs); + // Step 3: Add the sorted options into the options_ vector for HybridScan to use them to + // perform seeks and nexts. + // options_ array indexes into every key column. Here we append to every key column the + // list of target elements that needs to be scanned. + int num_options = options.elems_size(); for (int i = 0; i < num_options; i++) { const auto& elem = sorted_options[i]; + DCHECK(elem->has_tuple_value()); const auto& value = elem->tuple_value(); - - for (size_t j = start_range_col_idx; j < total_cols; j++) { - const auto sorting_type = schema_.column(col_idxs[j]).sorting_type(); - auto option = KeyEntryValue::FromQLValuePBForKey( - value.elems(static_cast(j)), sorting_type); - (*range_options_)[col_idxs[j] - num_hash_cols].push_back(std::move(option)); + DCHECK_EQ(total_cols, value.elems_size()); + + for (size_t j = 0; j < total_cols; j++) { + const auto sorting_type = get_sorting_type(col_idxs[j]); + + // For hash tuples, the first element always contains the yb_hash_code + auto option = (j == 0 && col_idxs[j] == kYbHashCodeColId) + ? KeyEntryValue::UInt16Hash(value.elems(static_cast(j)).int32_value()) + : KeyEntryValue::FromQLValuePBForKey(value.elems(static_cast(j)), + sorting_type); + auto options_idx = schema().get_dockey_component_idx(col_idxs[j]); + (*options_)[options_idx].push_back(std::move(option)); } } } } - break; } @@ -300,19 +346,41 @@ KeyBytes DocPgsqlScanSpec::bound_key(const Schema& schema, const bool lower_boun KeyBytes result; auto encoder = DocKeyEncoder(&result).Schema(schema); - bool has_hash_columns = schema_.num_hash_key_columns() > 0; - bool hash_components_unset = has_hash_columns && hashed_components_->empty(); + bool has_hash_columns = schema.num_hash_key_columns() > 0; + std::vector hashed_components; + hashed_components.reserve(schema.num_hash_key_columns()); + + int32_t hash_code; + int32_t max_hash_code; + if (hashed_components_->empty() && has_hash_columns && options_ && !options_->empty()) { + for (size_t i = 0; i < schema.num_hash_key_columns(); ++i) { + DCHECK_GE(options_->size(), + schema.num_hash_key_columns() + schema.has_yb_hash_code()); + KeyEntryValue keyval = (*options_)[schema.get_dockey_component_idx(i)].front(); + hashed_components.push_back(keyval); + } + hash_code = static_cast(options_.get()[0][0][0].GetUInt16Hash()); + max_hash_code = (*options_)[0].size() == 1 ? hash_code + : max_hash_code_.get_value_or(std::numeric_limits::max()); + } else { + hash_code = hash_code_.get_value_or(std::numeric_limits::min()); + max_hash_code = max_hash_code_.get_value_or(std::numeric_limits::max()); + hashed_components = *hashed_components_; + } + + bool hash_components_unset = has_hash_columns && + (hashed_components.empty() && hashed_components_->empty()); if (hash_components_unset) { // use lower bound hash code if set in request (for scans using token) - if (lower_bound && hash_code_) { - encoder.HashAndRange(*hash_code_, + if (lower_bound && hash_code) { + encoder.HashAndRange(hash_code, {KeyEntryValue(KeyEntryType::kLowest)}, {KeyEntryValue(KeyEntryType::kLowest)}); } // use upper bound hash code if set in request (for scans using token) if (!lower_bound) { if (max_hash_code_) { - encoder.HashAndRange(*max_hash_code_, + encoder.HashAndRange(max_hash_code, {KeyEntryValue(KeyEntryType::kHighest)}, {KeyEntryValue(KeyEntryType::kHighest)}); } else { @@ -323,14 +391,11 @@ KeyBytes DocPgsqlScanSpec::bound_key(const Schema& schema, const bool lower_boun } if (has_hash_columns) { - uint16_t hash = lower_bound - ? hash_code_.get_value_or(std::numeric_limits::min()) - : max_hash_code_.get_value_or(std::numeric_limits::max()); - - encoder.HashAndRange(hash, *hashed_components_, range_components(lower_bound)); + uint16_t hash = lower_bound ? hash_code : max_hash_code; + encoder.HashAndRange(hash, hashed_components, range_components(lower_bound)); } else { // If no hash columns use default hash code (0). - encoder.Hash(false, 0, *hashed_components_).Range(range_components(lower_bound)); + encoder.Hash(false, 0, hashed_components).Range(range_components(lower_bound)); } return result; } @@ -338,7 +403,7 @@ KeyBytes DocPgsqlScanSpec::bound_key(const Schema& schema, const bool lower_boun std::vector DocPgsqlScanSpec::range_components(const bool lower_bound, std::vector *inclusivities, bool use_strictness) const { - return GetRangeKeyScanSpec(schema_, + return GetRangeKeyScanSpec(schema(), range_components_, range_bounds_.get(), inclusivities, @@ -361,7 +426,7 @@ Result DocPgsqlScanSpec::Bound(const bool lower_bound) const { } // Paging state + forward scan. - if (is_forward_scan_) { + if (is_forward_scan()) { return lower_bound ? start_doc_key_ : upper_doc_key_; } diff --git a/src/yb/docdb/doc_pgsql_scanspec.h b/src/yb/docdb/doc_pgsql_scanspec.h index 9f747aae17b7..79a20e3a3e5d 100644 --- a/src/yb/docdb/doc_pgsql_scanspec.h +++ b/src/yb/docdb/doc_pgsql_scanspec.h @@ -66,10 +66,6 @@ class DocPgsqlScanSpec : public PgsqlScanSpec { return query_id_; } - bool is_forward_scan() const { - return is_forward_scan_; - } - const size_t prefix_length() const { return prefix_length_; } @@ -91,18 +87,18 @@ class DocPgsqlScanSpec : public PgsqlScanSpec { return range_bounds_.get(); } - const std::shared_ptr>& range_options() const { return range_options_; } + const std::shared_ptr>& options() const { return options_; } - const std::vector range_options_indexes() const { - return range_options_indexes_; + const std::vector& options_indexes() const { + return options_col_ids_; } - const std::vector range_bounds_indexes() const { + const std::vector& range_bounds_indexes() const { return range_bounds_indexes_; } - const ColGroupHolder range_options_groups() const { - return range_options_groups_; + const ColGroupHolder& options_groups() const { + return options_groups_; } private: @@ -120,21 +116,17 @@ class DocPgsqlScanSpec : public PgsqlScanSpec { // Ids of columns that have range bounds such as c2 < 4 AND c2 >= 1. std::vector range_bounds_indexes_; - // Initialize range_options_ if hashed_components_ in set and all range columns have one or more - // options (i.e. using EQ/IN conditions). Otherwise range_options_ will stay null and we will - // only use the range_bounds for scanning. - void InitRangeOptions(const PgsqlConditionPB& condition); + // Initialize options_ if range columns have one or more options (i.e. using EQ/IN + // conditions). Otherwise options_ will stay null and we will only use the range_bounds for + // scanning. + void InitOptions(const PgsqlConditionPB& condition); - // The range value options if set. (possibly more than one due to IN conditions). - std::shared_ptr> range_options_; + // The range/hash value options if set (possibly more than one due to IN conditions). + std::shared_ptr> options_; - // Ids of columns that have range option filters such as c2 IN (1, 5, 6, 9). - std::vector range_options_indexes_; + // Ids of key columns that have filters such as h1 IN (1, 5, 6, 9) or r1 IN (5, 6, 7) + std::vector options_col_ids_; - // Schema of the columns to scan. - const Schema& schema_; - - // Query ID of this scan. const rocksdb::QueryId query_id_; // The hashed_components are owned by the caller of QLScanSpec. @@ -142,12 +134,12 @@ class DocPgsqlScanSpec : public PgsqlScanSpec { // The range_components are owned by the caller of QLScanSpec. const std::vector *range_components_; - // Groups of range column indexes found from the filters. + // Groups of column indexes found from the filters. // Eg: If we had an incoming filter of the form (r1, r3, r4) IN ((1,2,5), (5,4,3), ...) // AND r2 <= 5 // where (r1,r2,r3,r4) is the primary key of this table, then - // range_options_groups_ would contain the groups {0,2,3} and {1}. - ColGroupHolder range_options_groups_; + // options_groups_ would contain the groups {0,2,3} and {1}. + ColGroupHolder options_groups_; // Hash code is used if hashed_components_ vector is empty. // hash values are positive int16_t. @@ -164,9 +156,6 @@ class DocPgsqlScanSpec : public PgsqlScanSpec { KeyBytes lower_doc_key_; KeyBytes upper_doc_key_; - // Scan behavior. - bool is_forward_scan_; - size_t prefix_length_ = 0; DISALLOW_COPY_AND_ASSIGN(DocPgsqlScanSpec); diff --git a/src/yb/docdb/doc_ql_scanspec.cc b/src/yb/docdb/doc_ql_scanspec.cc index 9fd1edd998e9..744b22dd22d8 100644 --- a/src/yb/docdb/doc_ql_scanspec.cc +++ b/src/yb/docdb/doc_ql_scanspec.cc @@ -33,10 +33,12 @@ namespace docdb { namespace { -bool AreColumnsContinous(std::vector col_idxs) { +bool AreColumnsContinous(ColumnListVector col_idxs) { std::sort(col_idxs.begin(), col_idxs.end()); for (size_t i = 0; i < col_idxs.size() - 1; ++i) { - if (col_idxs[i] + 1 != col_idxs[i + 1]) { + if (col_idxs[i] == kYbHashCodeColId) { + continue; + } else if (col_idxs[i] + 1 != col_idxs[i + 1]) { return false; } } @@ -50,11 +52,10 @@ DocQLScanSpec::DocQLScanSpec(const Schema& schema, const rocksdb::QueryId query_id, const bool is_forward_scan, const size_t prefix_length) - : QLScanSpec(nullptr, nullptr, is_forward_scan, std::make_shared()), + : QLScanSpec(schema, nullptr, nullptr, is_forward_scan, std::make_shared()), range_bounds_(nullptr), - schema_(schema), hashed_components_(nullptr), - range_options_groups_(0), + options_groups_(0), include_static_columns_(false), doc_key_(doc_key.Encode()), query_id_(query_id), @@ -73,13 +74,16 @@ DocQLScanSpec::DocQLScanSpec( const bool include_static_columns, const DocKey& start_doc_key, const size_t prefix_length) - : QLScanSpec(condition, if_condition, is_forward_scan, std::make_shared()), + : QLScanSpec(schema, + condition, + if_condition, + is_forward_scan, + std::make_shared()), range_bounds_(condition ? new QLScanRange(schema, *condition) : nullptr), - schema_(schema), hash_code_(hash_code), max_hash_code_(max_hash_code), hashed_components_(&hashed_components.get()), - range_options_groups_(schema_.num_range_key_columns()), + options_groups_(schema.num_dockey_components()), include_static_columns_(include_static_columns), start_doc_key_(start_doc_key.empty() ? KeyBytes() : start_doc_key.Encode()), lower_doc_key_(bound_key(true)), @@ -91,23 +95,42 @@ DocQLScanSpec::DocQLScanSpec( range_bounds_indexes_ = range_bounds_->GetColIds(); } + if (!hashed_components_->empty() && schema.num_hash_key_columns()) { + options_ = std::make_shared>(schema.num_dockey_components()); + // should come here if we are not batching hash keys as a part of IN condition + options_groups_.BeginNewGroup(); + options_groups_.AddToLatestGroup(0); + options_col_ids_.emplace_back(ColumnId(kYbHashCodeColId)); + (*options_)[0].push_back(KeyEntryValue::UInt16Hash(hash_code_.value())); + DCHECK(hashed_components_->size() == schema.num_hash_key_columns()); + + for (size_t col_idx = 0; col_idx < schema.num_hash_key_columns(); ++col_idx) { + options_groups_.AddToLatestGroup(schema.get_dockey_component_idx(col_idx)); + options_col_ids_.emplace_back(schema.column_id(col_idx)); + + (*options_)[schema.get_dockey_component_idx(col_idx)] + .push_back(std::move((*hashed_components_)[col_idx])); + } + } + // If the hash key is fixed and we have range columns with IN condition, try to construct the // exact list of range options to scan for. - if (!hashed_components_->empty() && schema_.num_range_key_columns() > 0 && range_bounds_ && + if (!hashed_components_->empty() && schema.num_range_key_columns() > 0 && range_bounds_ && range_bounds_->has_in_range_options()) { DCHECK(condition); - range_options_ = std::make_shared>(schema_.num_range_key_columns()); - InitRangeOptions(*condition); + if (!options_) { + options_ = std::make_shared>(schema.num_dockey_components()); + } + InitOptions(*condition); } } -void DocQLScanSpec::InitRangeOptions(const QLConditionPB& condition) { - size_t num_hash_cols = schema_.num_hash_key_columns(); +void DocQLScanSpec::InitOptions(const QLConditionPB& condition) { switch (condition.op()) { case QLOperator::QL_OP_AND: for (const auto& operand : condition.operands()) { DCHECK(operand.has_condition()); - InitRangeOptions(operand.condition()); + InitOptions(operand.condition()); } break; @@ -115,6 +138,8 @@ void DocQLScanSpec::InitRangeOptions(const QLConditionPB& condition) { case QLOperator::QL_OP_IN: { DCHECK_EQ(condition.operands_size(), 2); // Skip any condition where LHS is not a column (e.g. subscript columns: 'map[k] = v') + // operands(0) always contains the column id. + // operands(1) contains the corresponding value or a list values. const auto& lhs = condition.operands(0); const auto& rhs = condition.operands(1); if (lhs.expr_case() != QLExpressionPB::kColumnId && @@ -129,56 +154,58 @@ void DocQLScanSpec::InitRangeOptions(const QLConditionPB& condition) { if (lhs.has_column_id()) { ColumnId col_id = ColumnId(lhs.column_id()); - size_t col_idx = schema_.find_column_by_id(col_id); + size_t col_idx = schema().find_column_by_id(col_id); - // Skip any non-range columns. - if (!schema_.is_range_column(col_idx)) { - return; - } + // Skip any non-range columns. Hashed columns should always be sent as tuples along with + // their yb_hash_code. Hence, for hashed columns lhs should never be a column id. + DCHECK(schema().is_range_column(col_idx)); + + auto sorting_type = get_sorting_type(col_idx); + + // Adding the offset if yb_hash_code is present after schema usages. Schema does not know + // about yb_hash_code_column + auto key_idx = schema().get_dockey_component_idx(col_idx); - range_options_groups_.BeginNewGroup(); - range_options_groups_.AddToLatestGroup(col_idx - num_hash_cols); - SortingType sorting_type = schema_.column(col_idx).sorting_type(); // TODO: confusing - name says indexes but stores ids - range_options_indexes_.emplace_back(col_id); + options_col_ids_.emplace_back(col_id); + options_groups_.BeginNewGroup(); + options_groups_.AddToLatestGroup(key_idx); if (condition.op() == QL_OP_EQUAL) { auto pv = KeyEntryValue::FromQLValuePBForKey(rhs.value(), sorting_type); - (*range_options_)[col_idx - num_hash_cols].push_back(std::move(pv)); + (*options_)[key_idx].push_back(std::move(pv)); } else { // QL_OP_IN DCHECK_EQ(condition.op(), QL_OP_IN); DCHECK(rhs.value().has_list_value()); const auto& options = rhs.value().list_value(); int opt_size = options.elems_size(); - (*range_options_)[col_idx - num_hash_cols].reserve(opt_size); + (*options_)[key_idx].reserve(opt_size); // IN arguments should have been de-duplicated and ordered ascendingly by the executor. - bool is_reverse_order = is_forward_scan_ ^ (sorting_type == SortingType::kAscending); + auto is_reverse_order = get_scan_direction(col_idx); for (int i = 0; i < opt_size; i++) { int elem_idx = is_reverse_order ? opt_size - i - 1 : i; const auto& elem = options.elems(elem_idx); auto pv = KeyEntryValue::FromQLValuePBForKey(elem, sorting_type); - (*range_options_)[col_idx - num_hash_cols].push_back(std::move(pv)); + (*options_)[key_idx].push_back(std::move(pv)); } } } else if (lhs.has_tuple()) { - size_t num_cols = lhs.tuple().elems_size(); - DCHECK_GT(num_cols, 0); - std::vector col_ids; - std::vector col_idxs; - col_ids.reserve(num_cols); - col_idxs.reserve(num_cols); - range_options_groups_.BeginNewGroup(); + size_t total_cols = lhs.tuple().elems_size(); + DCHECK_GT(total_cols, 0); + + ColumnListVector col_idxs; + col_idxs.reserve(total_cols); + options_groups_.BeginNewGroup(); for (const auto& elem : lhs.tuple().elems()) { DCHECK(elem.has_column_id()); ColumnId col_id(elem.column_id()); - int col_idx = schema_.find_column_by_id(col_id); - DCHECK(schema_.is_range_column(col_idx)); - col_ids.push_back(col_id); + auto col_idx = elem.column_id() == kYbHashCodeColId ? kYbHashCodeColId + : schema().find_column_by_id(col_id); col_idxs.push_back(col_idx); - range_options_groups_.AddToLatestGroup(col_idx - num_hash_cols); - range_options_indexes_.emplace_back(col_id); + options_col_ids_.emplace_back(col_id); + options_groups_.AddToLatestGroup(schema().get_dockey_component_idx(col_idx)); } DCHECK(AreColumnsContinous(col_idxs)); @@ -186,33 +213,49 @@ void DocQLScanSpec::InitRangeOptions(const QLConditionPB& condition) { if (condition.op() == QL_OP_EQUAL) { DCHECK(rhs.value().has_list_value()); const auto& value = rhs.value().list_value(); - DCHECK_EQ(num_cols, value.elems_size()); - for (size_t i = 0; i < num_cols; i++) { - SortingType sorting_type = schema_.column(col_idxs[i]).sorting_type(); - Option option = + DCHECK_EQ(total_cols, value.elems_size()); + for (size_t i = 0; i < total_cols; i++) { + auto sorting_type = get_sorting_type(col_idxs[i]); + auto option = KeyEntryValue::FromQLValuePBForKey(value.elems(static_cast(i)), sorting_type); - (*range_options_)[col_idxs[i] - num_hash_cols].push_back(std::move(option)); + auto options_idx = + schema().get_dockey_component_idx(col_idxs[i]); + (*options_)[options_idx].push_back(std::move(option)); } } else if (condition.op() == QL_OP_IN) { DCHECK(rhs.value().has_list_value()); const auto& options = rhs.value().list_value(); - int num_options = options.elems_size(); // IN arguments should have been de-duplicated and ordered ascendingly by the // executor. + std::vector reverse; + reverse.reserve(total_cols); + + for (size_t i = 0; i < total_cols; i++) { + reverse.push_back(get_scan_direction(col_idxs[i])); + } + const auto sorted_options = - GetTuplesSortedByOrdering(options, schema_, is_forward_scan_, col_idxs); + GetTuplesSortedByOrdering(options, schema(), is_forward_scan(), col_idxs); + int num_options = options.elems_size(); for (int i = 0; i < num_options; i++) { const auto& elem = sorted_options[i]; + DCHECK(elem->has_tuple_value()); const auto& value = elem->tuple_value(); - DCHECK_EQ(num_cols, value.elems_size()); - - for (size_t j = 0; j < num_cols; j++) { - const auto sorting_type = schema_.column(col_idxs[j]).sorting_type(); - auto option = KeyEntryValue::FromQLValuePBForKey( - value.elems(static_cast(j)), sorting_type); - (*range_options_)[col_idxs[j] - num_hash_cols].push_back(std::move(option)); + DCHECK_EQ(total_cols, value.elems_size()); + + for (size_t j = 0; j < total_cols; j++) { + const auto sorting_type = get_sorting_type(col_idxs[j]); + // For hash tuples, the first element always contains the yb_hash_code + DCHECK(col_idxs[j] != kYbHashCodeColId || j == 0); + auto option = (col_idxs[j] == kYbHashCodeColId) + ? KeyEntryValue::UInt16Hash(value.elems(static_cast(j)).int32_value()) + : KeyEntryValue::FromQLValuePBForKey(value.elems(static_cast(j)), + sorting_type); + auto options_idx = + schema().get_dockey_component_idx(col_idxs[j]); + (*options_)[options_idx].push_back(std::move(option)); } } } @@ -256,7 +299,7 @@ KeyBytes DocQLScanSpec::bound_key(const bool lower_bound) const { std::vector DocQLScanSpec::range_components(const bool lower_bound, std::vector *inclusivities, bool use_strictness) const { - return GetRangeKeyScanSpec(schema_, + return GetRangeKeyScanSpec(schema(), nullptr /* prefixed_range_components */, range_bounds_.get(), inclusivities, @@ -329,7 +372,7 @@ Result DocQLScanSpec::Bound(const bool lower_bound) const { } // Paging state + forward scan. - if (is_forward_scan_) { + if (is_forward_scan()) { return lower_bound ? start_doc_key_ : upper_doc_key_; } diff --git a/src/yb/docdb/doc_ql_scanspec.h b/src/yb/docdb/doc_ql_scanspec.h index 9e6e92b9610a..e52689df11ba 100644 --- a/src/yb/docdb/doc_ql_scanspec.h +++ b/src/yb/docdb/doc_ql_scanspec.h @@ -70,7 +70,7 @@ class DocQLScanSpec : public QLScanSpec { return query_id_; } - const std::shared_ptr>& range_options() const { return range_options_; } + const std::shared_ptr>& options() const { return options_; } bool include_static_columns() const { return include_static_columns_; @@ -80,18 +80,16 @@ class DocQLScanSpec : public QLScanSpec { return range_bounds_.get(); } - const Schema* schema() const override { return &schema_; } - - const std::vector range_options_indexes() const { - return range_options_indexes_; + const std::vector options_indexes() const { + return options_col_ids_; } const std::vector range_bounds_indexes() const { return range_bounds_indexes_; } - const ColGroupHolder range_options_groups() const { - return range_options_groups_; + const ColGroupHolder options_groups() const { + return options_groups_; } const size_t prefix_length() const { @@ -104,10 +102,10 @@ class DocQLScanSpec : public QLScanSpec { // Return inclusive lower/upper range doc key considering the start_doc_key. Result Bound(const bool lower_bound) const; - // Initialize range_options_ if hashed_components_ is set and all range columns have one or more - // options (i.e. using EQ/IN conditions). Otherwise range_options_ will stay null and we will - // only use the range_bounds for scanning. - void InitRangeOptions(const QLConditionPB& condition); + // Initialize options_ if range columns have one or more options (i.e. using EQ/IN + // conditions). Otherwise options_ will stay null and we will only use the range_bounds for + // scanning. + void InitOptions(const QLConditionPB& condition); // Returns the lower/upper doc key based on the range components. KeyBytes bound_key(const bool lower_bound) const; @@ -123,9 +121,6 @@ class DocQLScanSpec : public QLScanSpec { // Ids of columns that have range bounds such as c2 < 4 AND c2 >= 1. std::vector range_bounds_indexes_; - // Schema of the columns to scan. - const Schema& schema_; - // Hash code to scan at (interpreted as lower bound if hashed_components_ are empty) // hash values are positive int16_t. const boost::optional hash_code_; @@ -137,18 +132,18 @@ class DocQLScanSpec : public QLScanSpec { // The hashed_components are owned by the caller of QLScanSpec. const std::vector* hashed_components_; - // The range value options if set. (possibly more than one due to IN conditions). - std::shared_ptr> range_options_; + // The range/hash value options if set (possibly more than one due to IN conditions). + std::shared_ptr> options_; - // Ids of columns that have range option filters such as c2 IN (1, 5, 6, 9). - std::vector range_options_indexes_; + // Ids of key columns that have filters such as h1 IN (1, 5, 6, 9) or r1 IN (5, 6, 7) + std::vector options_col_ids_; - // Groups of range column indexes found from the filters. + // Groups of column indexes found from the filters. // Eg: If we had an incoming filter of the form (r1, r3, r4) IN ((1,2,5), (5,4,3), ...) // AND r2 <= 5 // where (r1,r2,r3,r4) is the primary key of this table, then - // range_options_groups_ would contain the groups {0,2,3} and {1}. - ColGroupHolder range_options_groups_; + // options_groups_ would contain the groups {0,2,3} and {1}. + ColGroupHolder options_groups_; // Does the scan include static columns also? const bool include_static_columns_; diff --git a/src/yb/docdb/key_entry_value.h b/src/yb/docdb/key_entry_value.h index d947e06e09c0..7b1089df64da 100644 --- a/src/yb/docdb/key_entry_value.h +++ b/src/yb/docdb/key_entry_value.h @@ -27,6 +27,7 @@ #include "yb/docdb/docdb_encoding_fwd.h" +#include "yb/gutil/integral_types.h" #include "yb/util/algorithm_util.h" #include "yb/util/kv_util.h" #include "yb/util/net/inetaddress.h" @@ -96,6 +97,7 @@ class KeyEntryValue { bool IsString() const; bool IsInt32() const; + bool IsUInt16Hash() const; bool IsInt64() const; bool IsFloat() const; bool IsDouble() const; @@ -110,6 +112,7 @@ class KeyEntryValue { bool IsTimestamp() const; const std::string& GetString() const; + uint16_t GetUInt16Hash() const; int32_t GetInt32() const; int64_t GetInt64() const; float GetFloat() const; diff --git a/src/yb/docdb/primitive_value.cc b/src/yb/docdb/primitive_value.cc index ddb33ea019d4..6d47733c9484 100644 --- a/src/yb/docdb/primitive_value.cc +++ b/src/yb/docdb/primitive_value.cc @@ -26,9 +26,11 @@ #include "yb/docdb/doc_key.h" #include "yb/docdb/doc_kv_util.h" #include "yb/docdb/intent.h" +#include "yb/docdb/key_entry_value.h" #include "yb/docdb/value_type.h" #include "yb/gutil/casts.h" +#include "yb/gutil/integral_types.h" #include "yb/gutil/macros.h" #include "yb/gutil/stringprintf.h" #include "yb/gutil/strings/substitute.h" @@ -2879,6 +2881,15 @@ bool KeyEntryValue::IsInt32() const { return KeyEntryType::kInt32 == type_ || KeyEntryType::kInt32Descending == type_; } +bool KeyEntryValue::IsUInt16Hash() const { + return type_ == KeyEntryType::kUInt16Hash; +} + +uint16_t KeyEntryValue::GetUInt16Hash() const { + DCHECK(IsUInt16Hash()); + return uint16_val_; +} + int32_t KeyEntryValue::GetInt32() const { DCHECK(IsInt32()); return int32_val_; diff --git a/src/yb/docdb/ql_rocksdb_storage.cc b/src/yb/docdb/ql_rocksdb_storage.cc index 89849495525b..d4d8a5e60b7d 100644 --- a/src/yb/docdb/ql_rocksdb_storage.cc +++ b/src/yb/docdb/ql_rocksdb_storage.cc @@ -185,7 +185,8 @@ Status QLRocksDBStorage::GetIterator( projection, doc_read_context, txn_op_context, doc_db_, deadline, read_time, /*pending_op_counter=*/nullptr, end_referenced_key_column_index); - if (range_components.size() == schema.num_range_key_columns()) { + if (range_components.size() == schema.num_range_key_columns() && + hashed_components.size() == schema.num_hash_key_columns()) { // Construct the scan spec basing on the RANGE condition as all range columns are specified. RETURN_NOT_OK(doc_iter->Init(DocPgsqlScanSpec( schema, diff --git a/src/yb/docdb/scan_choices-test.cc b/src/yb/docdb/scan_choices-test.cc index f2146bed805f..66829913a0f2 100644 --- a/src/yb/docdb/scan_choices-test.cc +++ b/src/yb/docdb/scan_choices-test.cc @@ -231,7 +231,7 @@ void ScanChoicesTest::AdjustForRangeConstraints() { EXPECT_FALSE(choices_->FinishedWithScanChoices()); const auto &cur_target = choices_->current_scan_target_; DocKeyDecoder decoder(cur_target); - EXPECT_OK(decoder.DecodeToRangeGroup()); + EXPECT_OK(decoder.DecodeToKeys()); KeyEntryValue cur_val; // The size of the dockey we have found so far that does not need adjustment size_t valid_size = 0; diff --git a/src/yb/docdb/scan_choices.cc b/src/yb/docdb/scan_choices.cc index 38c810cbacbb..dd77235f0d34 100644 --- a/src/yb/docdb/scan_choices.cc +++ b/src/yb/docdb/scan_choices.cc @@ -37,7 +37,8 @@ bool ScanChoices::CurrentTargetMatchesKey(const Slice& curr) { curr.starts_with(current_scan_target_) ? "YEP" : "NOPE") << ": " << DocKey::DebugSliceToString(curr) << " vs " << DocKey::DebugSliceToString(current_scan_target_.AsSlice()); - return !current_scan_target_.empty() && curr.starts_with(current_scan_target_); + return is_trivial_filter_ || + (!current_scan_target_.empty() && curr.starts_with(current_scan_target_)); } HybridScanChoices::HybridScanChoices( @@ -45,8 +46,8 @@ HybridScanChoices::HybridScanChoices( const KeyBytes& lower_doc_key, const KeyBytes& upper_doc_key, bool is_forward_scan, - const std::vector& range_options_col_ids, - const std::shared_ptr>& range_options, + const std::vector& options_col_ids, + const std::shared_ptr>& options, const std::vector& range_bounds_col_ids, const QLScanRange* range_bounds, const ColGroupHolder& col_groups, @@ -54,18 +55,18 @@ HybridScanChoices::HybridScanChoices( : ScanChoices(is_forward_scan), lower_doc_key_(lower_doc_key), upper_doc_key_(upper_doc_key), col_groups_(col_groups), prefix_length_(prefix_length) { - size_t num_hash_cols = schema.num_hash_key_columns(); - // Number of dockey columns with specified filters. [kLowest, kHighest] does not - // count as a specified filter. - size_t last_filtered_idx = num_hash_cols - 1; + size_t last_filtered_idx = static_cast(-1); + has_hash_columns_ = schema.has_yb_hash_code(); + num_hash_cols_ = schema.num_hash_key_columns(); + + for (size_t idx = 0; idx < schema.num_dockey_components(); ++idx) { + const auto col_id = GetColumnId(schema, idx); - for (size_t idx = num_hash_cols; idx < schema.num_key_columns(); idx++) { - const ColumnId col_id = schema.column_id(idx); std::vector current_options; bool col_has_range_option = - std::find(range_options_col_ids.begin(), range_options_col_ids.end(), col_id) != - range_options_col_ids.end(); + std::find(options_col_ids.begin(), options_col_ids.end(), col_id) != + options_col_ids.end(); bool col_has_range_bound = std::find(range_bounds_col_ids.begin(), range_bounds_col_ids.end(), col_id) != @@ -73,7 +74,9 @@ HybridScanChoices::HybridScanChoices( // If this is a range bound filter, we create a singular // list of the given range bound if (col_has_range_bound && !col_has_range_option) { - const auto col_sort_type = schema.column(idx).sorting_type(); + const auto col_sort_type = + col_id.rep() == kYbHashCodeColId ? SortingType::kAscending + : schema.column(schema.find_column_by_id( col_id)).sorting_type(); const QLScanRange::QLRange range = range_bounds->RangeFor(col_id); const auto lower = GetQLRangeBoundAsPVal(range, col_sort_type, true /* lower_bound */); const auto upper = GetQLRangeBoundAsPVal(range, col_sort_type, false /* upper_bound */); @@ -86,13 +89,14 @@ HybridScanChoices::HybridScanChoices( current_options.size() + 1); col_groups_.BeginNewGroup(); - col_groups_.AddToLatestGroup(idx - num_hash_cols); - if (!upper.IsInfinity() || !lower.IsInfinity()) + col_groups_.AddToLatestGroup(scan_options_.size()); + if (!upper.IsInfinity() || !lower.IsInfinity()) { last_filtered_idx = idx; + } } else if (col_has_range_option) { - auto& options = (*range_options)[idx - num_hash_cols]; - current_options.reserve(options.size()); - if (options.empty()) { + const auto& temp_options = (*options)[idx]; + current_options.reserve(temp_options.size()); + if (temp_options.empty()) { // If there is nothing specified in the IN list like in // SELECT * FROM ... WHERE c1 IN (); // then nothing should pass the filter. @@ -108,24 +112,25 @@ HybridScanChoices::HybridScanChoices( current_options.size(), current_options.size() + 1); } else { - auto last_option = *options.begin(); + auto last_option = *temp_options.begin(); size_t begin = 0; size_t current_ind = 0; - auto opt_list_idx = idx - num_hash_cols; + auto opt_list_idx = idx; auto group = col_groups_.GetGroup(opt_list_idx); + DCHECK(std::is_sorted(group.begin(), group.end())); // We carry out run compression on all the options as described in the // comment for the OptionRange class. - bool is_front = group.front() == (opt_list_idx); + bool is_front = (group.front() == opt_list_idx); std::vector::iterator prev_options_list_it; if (!is_front) { auto it = std::find(group.begin(), group.end(), opt_list_idx); --it; - prev_options_list_it = range_cols_scan_options_[*it].begin(); + prev_options_list_it = scan_options_[*it].begin(); } - for (auto option : options) { + for (const auto& option : temp_options) { // If we're moving to a new option value or we are crossing boundaries // across options for the previous options list then we push a new // option for this list. @@ -148,7 +153,7 @@ HybridScanChoices::HybridScanChoices( // If no filter is specified, we just impose an artificial range // filter [kLowest, kHighest] col_groups_.BeginNewGroup(); - col_groups_.AddToLatestGroup(range_cols_scan_options_.size()); + col_groups_.AddToLatestGroup(scan_options_.size()); current_options.emplace_back( KeyEntryValue(KeyEntryType::kLowest), true, @@ -157,24 +162,29 @@ HybridScanChoices::HybridScanChoices( current_options.size(), current_options.size() + 1); } - range_cols_scan_options_.push_back(current_options); + scan_options_.push_back(current_options); } - size_t filter_length = std::max(last_filtered_idx - num_hash_cols + 1, prefix_length); - DCHECK_LE(filter_length, range_cols_scan_options_.size()); + // We add 1 to a valid prefix_length_ if there are hash columns + // to account for the hash code column + prefix_length_ += (prefix_length_ && has_hash_columns_); + + size_t filter_length = std::max(last_filtered_idx + 1, prefix_length_); + DCHECK_LE(filter_length, scan_options_.size()); - range_cols_scan_options_.resize(filter_length); + scan_options_.resize(filter_length); + is_trivial_filter_ = scan_options_.empty(); - current_scan_target_ranges_.resize(range_cols_scan_options_.size()); + current_scan_target_ranges_.resize(scan_options_.size()); current_scan_target_.Clear(); // Initialize current_scan_target_ranges_ - for (size_t i = 0; i < range_cols_scan_options_.size(); i++) { - current_scan_target_ranges_[i] = range_cols_scan_options_.at(i).begin(); + for (size_t i = 0; i < scan_options_.size(); i++) { + current_scan_target_ranges_[i] = scan_options_.at(i).begin(); } - schema_num_keys_ = schema.num_range_key_columns(); + schema_num_keys_ = schema.num_dockey_components(); } HybridScanChoices::HybridScanChoices( @@ -185,9 +195,9 @@ const KeyBytes& upper_doc_key, const size_t prefix_length) : HybridScanChoices( schema, lower_doc_key, upper_doc_key, doc_spec.is_forward_scan(), - doc_spec.range_options_indexes(), doc_spec.range_options(), + doc_spec.options_indexes(), doc_spec.options(), doc_spec.range_bounds_indexes(), doc_spec.range_bounds(), - doc_spec.range_options_groups(), prefix_length) {} + doc_spec.options_groups(), prefix_length) {} HybridScanChoices::HybridScanChoices( const Schema& schema, @@ -197,37 +207,52 @@ HybridScanChoices::HybridScanChoices( const size_t prefix_length) : HybridScanChoices( schema, lower_doc_key, upper_doc_key, doc_spec.is_forward_scan(), - doc_spec.range_options_indexes(), doc_spec.range_options(), + doc_spec.options_indexes(), doc_spec.options(), doc_spec.range_bounds_indexes(), doc_spec.range_bounds(), - doc_spec.range_options_groups(), prefix_length) {} + doc_spec.options_groups(), prefix_length) {} std::vector::const_iterator -HybridScanChoices::GetOptAtIndex(size_t opt_list_idx, size_t opt_index) { +HybridScanChoices::GetOptAtIndex(size_t opt_list_idx, size_t opt_index) const { if (col_groups_.GetGroup(opt_list_idx).back() == opt_list_idx) { // There shouldn't be any run-compression for elements at the back of a group. - return range_cols_scan_options_[opt_list_idx].begin() + opt_index; + return scan_options_[opt_list_idx].begin() + opt_index; } auto current = current_scan_target_ranges_[opt_list_idx]; - if (current != range_cols_scan_options_[opt_list_idx].end() && + if (current != scan_options_[opt_list_idx].end() && current->HasIndex(opt_index)) { return current; } // Find which options begin_idx, end_idx range contains opt_index. OptionRange target_value_range({}, true, {}, true, opt_index, opt_index); - auto option_it = std::lower_bound(range_cols_scan_options_[opt_list_idx].begin(), - range_cols_scan_options_[opt_list_idx].end(), + auto option_it = std::lower_bound(scan_options_[opt_list_idx].begin(), + scan_options_[opt_list_idx].end(), target_value_range, OptionRange::end_idx_leq); return option_it; } +ColumnId HybridScanChoices::GetColumnId(const Schema& schema, size_t idx) const { + return idx == 0 && has_hash_columns_ ? ColumnId(kYbHashCodeColId) + : schema.column_id(idx - has_hash_columns_); +} + +Status HybridScanChoices::DecodeKey(DocKeyDecoder* decoder, KeyEntryValue* target_value) const { + RETURN_NOT_OK(decoder->DecodeKeyEntryValue(target_value)); + + // We make sure to consume the kGroupEnd character if any. + if (!decoder->left_input().empty()) { + VERIFY_RESULT(decoder->HasPrimitiveValue(AllowSpecial::kTrue)); + } + return Status::OK(); +} + std::vector::const_iterator -HybridScanChoices::GetSearchSpaceLowerBound(size_t opt_list_idx) { +HybridScanChoices::GetSearchSpaceLowerBound(size_t opt_list_idx) const { auto group = col_groups_.GetGroup(opt_list_idx); if (group.front() == opt_list_idx) - return range_cols_scan_options_[opt_list_idx].begin(); + return scan_options_[opt_list_idx].begin(); auto it = std::find(group.begin(), group.end(), opt_list_idx); DCHECK(it != group.end()); @@ -239,10 +264,10 @@ HybridScanChoices::GetSearchSpaceLowerBound(size_t opt_list_idx) { } std::vector::const_iterator -HybridScanChoices::GetSearchSpaceUpperBound(size_t opt_list_idx) { +HybridScanChoices::GetSearchSpaceUpperBound(size_t opt_list_idx) const { auto group = col_groups_.GetGroup(opt_list_idx); if (group.front() == opt_list_idx) - return range_cols_scan_options_[opt_list_idx].end(); + return scan_options_[opt_list_idx].end(); auto it = std::find(group.begin(), group.end(), opt_list_idx); DCHECK(it != group.end()); @@ -256,8 +281,8 @@ HybridScanChoices::GetSearchSpaceUpperBound(size_t opt_list_idx) { void HybridScanChoices::SetOptToIndex(size_t opt_list_idx, size_t opt_index) { current_scan_target_ranges_[opt_list_idx] = GetOptAtIndex(opt_list_idx, opt_index); DCHECK_LT(current_scan_target_ranges_[opt_list_idx] - - range_cols_scan_options_[opt_list_idx].begin(), - range_cols_scan_options_[opt_list_idx].size()); + - scan_options_[opt_list_idx].begin(), + scan_options_[opt_list_idx].size()); } void HybridScanChoices::SetGroup(size_t opt_list_idx, size_t opt_index) { @@ -339,18 +364,18 @@ Result HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { [ If c_next is = u_c_k and l_c_(k+1) is a strict bound. ] */ DocKeyDecoder decoder(new_target); - RETURN_NOT_OK(decoder.DecodeToRangeGroup()); + RETURN_NOT_OK(decoder.DecodeToKeys()); current_scan_target_.Reset(Slice(new_target.data(), decoder.left_input().data())); size_t option_list_idx = 0; for (option_list_idx = 0; option_list_idx < current_scan_target_ranges_.size(); option_list_idx++) { - const auto& options = range_cols_scan_options_[option_list_idx]; + const auto& options = scan_options_[option_list_idx]; auto current_it = current_scan_target_ranges_[option_list_idx]; DCHECK(current_it != options.end()); KeyEntryValue target_value; - auto decode_status = decoder.DecodeKeyEntryValue(&target_value); + auto decode_status = DecodeKey(&decoder, &target_value); if (!decode_status.ok()) { VLOG(1) << "Failed to decode the key: " << decode_status; // We return false to give the caller a chance to validate and skip past any keys that scan @@ -383,11 +408,11 @@ Result HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { // If it's in range then good, continue after appending the target value // column. if (lower_cmp_fn(target_value, *lower) && upper_cmp_fn(target_value, *upper)) { - target_value.AppendToKey(¤t_scan_target_); + AppendToScanTarget(target_value, option_list_idx); continue; } - // If target_value is not in the current range then we must find a range + // If target_value is not in the current option then we must find a option // that works for it. // If we are above all ranges then increment the index of the previous // column. @@ -413,7 +438,7 @@ Result HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { if (it == end) { // target value is higher than all range options and // we need to increment. - RETURN_NOT_OK(IncrementScanTargetAtOptionList(static_cast(option_list_idx) - 1)); + RETURN_NOT_OK(IncrementScanTargetAtOptionList(option_list_idx - 1)); option_list_idx = current_scan_target_ranges_.size(); break; } @@ -440,7 +465,7 @@ Result HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { const KeyEntryValue& t2) { return t1 < t2; }; if (target_value >= *lower && target_value <= *upper) { - target_value.AppendToKey(¤t_scan_target_); + AppendToScanTarget(target_value, option_list_idx); if (lower_cmp_fn(target_value, *lower) && upper_cmp_fn(target_value, *upper)) { // target_value satisfies the current range condition. // Let's move on. @@ -455,13 +480,13 @@ Result HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { DCHECK(target_value == *upper || target_value == *lower); if (is_forward_scan_ && target_value == *upper) { - RETURN_NOT_OK(IncrementScanTargetAtOptionList(static_cast(option_list_idx))); + RETURN_NOT_OK(IncrementScanTargetAtOptionList(option_list_idx)); option_list_idx = current_scan_target_ranges_.size(); break; } if (!is_forward_scan_ && target_value == *lower) { - RETURN_NOT_OK(IncrementScanTargetAtOptionList(static_cast(option_list_idx))); + RETURN_NOT_OK(IncrementScanTargetAtOptionList(option_list_idx)); option_list_idx = current_scan_target_ranges_.size(); break; } @@ -469,11 +494,7 @@ Result HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { // If a strict lower bound is broken then we can simply append // a kHighest (kLowest) to get a target that satisfies the strict // lower bound - if (is_forward_scan_) { - KeyEntryValue(KeyEntryType::kHighest).AppendToKey(¤t_scan_target_); - } else { - KeyEntryValue(KeyEntryType::kLowest).AppendToKey(¤t_scan_target_); - } + AppendInfToScanTarget(is_forward_scan_, option_list_idx); option_list_idx++; break; } @@ -485,15 +506,20 @@ Result HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { DCHECK((is_forward_scan_ && *lower > target_value) || (!is_forward_scan_ && *upper < target_value)); + // Here we append the lower bound + kLowest or upper bound + kHighest. Generally appending + // to scan targets are always followed by a check if it has reached the last hash column. + // This is to add a kGroundEnd after the last hash column. However, here we append them + // directly and check for hash columns in the end. This is because, the possible combinations + // of appending them is complex and hence we append to key on a case by case basis. if (is_forward_scan_) { - lower->AppendToKey(¤t_scan_target_); + AppendToScanTarget(*lower, option_list_idx); if (!lower_incl) { - KeyEntryValue(KeyEntryType::kHighest).AppendToKey(¤t_scan_target_); + AppendInfToScanTarget(true, option_list_idx); } } else { - upper->AppendToKey(¤t_scan_target_); + AppendToScanTarget(*upper, option_list_idx); if (!upper_incl) { - KeyEntryValue(KeyEntryType::kLowest).AppendToKey(¤t_scan_target_); + AppendInfToScanTarget(false, option_list_idx); } } option_list_idx++; @@ -502,7 +528,7 @@ Result HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { // Reset the remaining range columns to lower bounds for forward scans // or upper bounds for backward scans. - for (size_t i = option_list_idx; i < range_cols_scan_options_.size(); i++) { + for (size_t i = option_list_idx; i < scan_options_.size(); i++) { auto begin = GetSearchSpaceLowerBound(i); SetGroup(i, begin->begin_idx()); @@ -510,19 +536,19 @@ Result HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { auto lower_incl = current_it->lower_inclusive(); auto upper_incl = current_it->upper_inclusive(); - if (is_forward_scan_) { - current_scan_target_ranges_[i]->lower().AppendToKey(¤t_scan_target_); - if (!lower_incl) { - KeyEntryValue(KeyEntryType::kHighest).AppendToKey(¤t_scan_target_); - } - } else { - current_scan_target_ranges_[i]->upper().AppendToKey(¤t_scan_target_); - if (!upper_incl) { - KeyEntryValue(KeyEntryType::kLowest).AppendToKey(¤t_scan_target_); - } - } + const auto& range_bound = is_forward_scan_ ? current_scan_target_ranges_[i]->lower() + : current_scan_target_ranges_[i]->upper(); + + AppendToScanTarget(range_bound, i); + if ((is_forward_scan_ && !lower_incl) || + (!is_forward_scan_ && !upper_incl)) { + AppendInfToScanTarget(is_forward_scan_, i); } + } + DCHECK(VERIFY_RESULT(ValidateHashGroup(current_scan_target_))) + << "current_scan_target_ validation failed: " + << DocKey::DebugSliceToString(current_scan_target_); VLOG(2) << "After " << __PRETTY_FUNCTION__ << " current_scan_target_ is " << DocKey::DebugSliceToString(current_scan_target_); return true; @@ -557,11 +583,11 @@ Result HybridScanChoices::SkipTargetsUpTo(const Slice& new_target) { // This function leaves the scan target as is if the next tuple in the current // scan direction is also the next tuple in the filter space and start_col // is given as the last column -Status HybridScanChoices::IncrementScanTargetAtOptionList(int start_option_list_idx) { +Status HybridScanChoices::IncrementScanTargetAtOptionList(ssize_t start_option_list_idx) { VLOG_WITH_FUNC(2) << "Incrementing at " << start_option_list_idx; // Increment start col, move backwards in case of overflow. - int option_list_idx = start_option_list_idx; + ssize_t option_list_idx = start_option_list_idx; // lower and upper here are taken relative to the scan order using extremal_fn_t = std::function; @@ -577,14 +603,14 @@ Status HybridScanChoices::IncrementScanTargetAtOptionList(int start_option_list_ : &OptionRange::lower; DocKeyDecoder t_decoder(current_scan_target_); - RETURN_NOT_OK(t_decoder.DecodeToRangeGroup()); + RETURN_NOT_OK(t_decoder.DecodeToKeys()); // refer to the documentation of this function to see what extremal // means here std::vector is_extremal; for (int i = 0; i <= option_list_idx; ++i) { KeyEntryValue target_value; - RETURN_NOT_OK(t_decoder.DecodeKeyEntryValue(&target_value)); + RETURN_NOT_OK(DecodeKey(&t_decoder, &target_value)); is_extremal.push_back(target_value == upper_extremal_fn(*current_scan_target_ranges_[i])); } @@ -610,7 +636,7 @@ Status HybridScanChoices::IncrementScanTargetAtOptionList(int start_option_list_ size_t idx = is_forward_scan_ ? it->begin_idx() : it->end_idx() - 1; SetGroup(option_list_idx, idx); - DCHECK(it != range_cols_scan_options_[option_list_idx].end()); + DCHECK(it != scan_options_[option_list_idx].end()); // if we are AT the boundary of a strict bound then we // want to append an infinity after this column to satisfy // the strict bound requirement @@ -629,9 +655,9 @@ Status HybridScanChoices::IncrementScanTargetAtOptionList(int start_option_list_ } DocKeyDecoder decoder(current_scan_target_); - RETURN_NOT_OK(decoder.DecodeToRangeGroup()); + RETURN_NOT_OK(decoder.DecodeToKeys()); for (int i = 0; i < option_list_idx; ++i) { - RETURN_NOT_OK(decoder.DecodeKeyEntryValue()); + RETURN_NOT_OK(DecodeKey(&decoder)); } if (option_list_idx < 0) { @@ -645,12 +671,8 @@ Status HybridScanChoices::IncrementScanTargetAtOptionList(int start_option_list_ decoder.left_input().cdata() - current_scan_target_.AsSlice().cdata()); if (start_with_infinity && - (option_list_idx < static_cast(schema_num_keys_))) { - if (is_forward_scan_) { - KeyEntryValue(KeyEntryType::kHighest).AppendToKey(¤t_scan_target_); - } else { - KeyEntryValue(KeyEntryType::kLowest).AppendToKey(¤t_scan_target_); - } + (option_list_idx < static_cast(schema_num_keys_))) { + AppendInfToScanTarget(is_forward_scan_, option_list_idx); option_list_idx++; } @@ -662,8 +684,8 @@ Status HybridScanChoices::IncrementScanTargetAtOptionList(int start_option_list_ // Reset all columns that are > col_idx // We don't want to necessarily reset col_idx as it may // have been the case that we got here via an increment on col_idx - int64 current_scan_target_ranges_size = static_cast(current_scan_target_ranges_.size()); - for (int i = option_list_idx; i < current_scan_target_ranges_size; ++i) { + ssize_t current_scan_target_ranges_size = current_scan_target_ranges_.size(); + for (auto i = option_list_idx; i < current_scan_target_ranges_size; ++i) { auto begin = GetSearchSpaceLowerBound(i); auto it_0 = i == option_list_idx ? current_scan_target_ranges_[i] : begin; @@ -671,21 +693,44 @@ Status HybridScanChoices::IncrementScanTargetAtOptionList(int start_option_list_ // with if necessary. SetGroup(i, it_0->begin_idx()); - lower_extremal_fn(*it_0).AppendToKey(¤t_scan_target_); + AppendToScanTarget(lower_extremal_fn(*it_0), i); if (!lower_extremal_incl_fn(*it_0)) { - if (is_forward_scan_) { - KeyEntryValue(KeyEntryType::kHighest).AppendToKey(¤t_scan_target_); - } else { - KeyEntryValue(KeyEntryType::kLowest).AppendToKey(¤t_scan_target_); - } + AppendInfToScanTarget(is_forward_scan_, i); } } VLOG_WITH_FUNC(2) << "Key after increment is " << DocKey::DebugSliceToString(current_scan_target_); + DCHECK(VERIFY_RESULT(ValidateHashGroup(current_scan_target_))) + << "current_scan_target_ validation failed: " + << DocKey::DebugSliceToString(current_scan_target_); return Status::OK(); } +// Validating Scan targets by checking if they have yb_hash_code, hash components and group end in +// order. We do not check range components as sometimes they can end without group ends. Sometimes +// seeks happens with kHighest and a groupend for hash split tables. In such a situation decoding +// hash code is not possible and hence, we validate only fully formed keys. +Result HybridScanChoices::ValidateHashGroup(const KeyBytes& scan_target) const { + if (is_options_done_) { + return true; + } + + DocKeyDecoder t_decoder(scan_target); + RETURN_NOT_OK(t_decoder.DecodeCotableId()); + RETURN_NOT_OK(t_decoder.DecodeColocationId()); + if (has_hash_columns_) { + if (!VERIFY_RESULT(t_decoder.DecodeHashCode(AllowSpecial::kTrue))) { + return false; + } + for (size_t i = 0; i < num_hash_cols_; i++) { + RETURN_NOT_OK(t_decoder.DecodeKeyEntryValue()); + } + RETURN_NOT_OK(t_decoder.ConsumeGroupEnd()); + } + return true; +} + std::vector HybridScanChoices::TEST_GetCurrentOptions() { std::vector result; for (auto it : current_scan_target_ranges_) { @@ -696,11 +741,11 @@ std::vector HybridScanChoices::TEST_GetCurrentOptions() { // Method called when the scan target is done being used Status HybridScanChoices::DoneWithCurrentTarget() { - if (schema_num_keys_ == range_cols_scan_options_.size() || + if (schema_num_keys_ == scan_options_.size() || prefix_length_ > 0) { - int incr_idx = - static_cast(prefix_length_ ? prefix_length_ : current_scan_target_ranges_.size()) - 1; + ssize_t incr_idx = + (prefix_length_ ? prefix_length_ : current_scan_target_ranges_.size()) - 1; RETURN_NOT_OK(IncrementScanTargetAtOptionList(incr_idx)); current_scan_target_.AppendKeyEntryType(KeyEntryType::kGroupEnd); } @@ -770,7 +815,7 @@ Status HybridScanChoices::SeekToCurrentTarget(IntentAwareIteratorIf* db_iter) { ScanChoicesPtr ScanChoices::Create( const Schema& schema, const DocQLScanSpec& doc_spec, const KeyBytes& lower_doc_key, const KeyBytes& upper_doc_key) { - if (doc_spec.range_options() || doc_spec.range_bounds()) { + if (doc_spec.options() || doc_spec.range_bounds()) { return std::make_unique( schema, doc_spec, lower_doc_key, upper_doc_key, doc_spec.prefix_length()); } @@ -781,7 +826,7 @@ ScanChoicesPtr ScanChoices::Create( ScanChoicesPtr ScanChoices::Create( const Schema& schema, const DocPgsqlScanSpec& doc_spec, const KeyBytes& lower_doc_key, const KeyBytes& upper_doc_key) { - if (doc_spec.range_options() || doc_spec.range_bounds()) { + if (doc_spec.options() || doc_spec.range_bounds()) { return std::make_unique( schema, doc_spec, lower_doc_key, upper_doc_key, doc_spec.prefix_length()); } diff --git a/src/yb/docdb/scan_choices.h b/src/yb/docdb/scan_choices.h index 4ec45fed104d..15737b038224 100644 --- a/src/yb/docdb/scan_choices.h +++ b/src/yb/docdb/scan_choices.h @@ -19,8 +19,10 @@ #include "yb/docdb/doc_ql_scanspec.h" #include "yb/docdb/value.h" #include "yb/docdb/docdb_fwd.h" - #include "yb/docdb/value_type.h" + +#include "yb/gutil/casts.h" + #include "yb/util/slice.h" #include "yb/util/status_fwd.h" @@ -53,8 +55,25 @@ class ScanChoices { // current target. virtual Status SeekToCurrentTarget(IntentAwareIteratorIf* db_iter) = 0; - static Result> DecodeKeyEntryValue( - DocKeyDecoder* decoder, size_t num_cols); + // Append KeyEntryValue to target. After every append, we need to check if it is the last hash key + // column. Subsequently, we need to add a kGroundEnd after that if it is the last hash key cokumn. + // Hence, appending to scan target should always be done using this function. + void AppendToScanTarget(const KeyEntryValue& target, size_t col_idx) { + target.AppendToKey(¤t_scan_target_); + if (has_hash_columns_ && col_idx == num_hash_cols_) { + current_scan_target_.AppendKeyEntryType(KeyEntryType::kGroupEnd); + } + } + + void AppendInfToScanTarget(bool positive, size_t col_idx) { + if (has_hash_columns_ && col_idx == num_hash_cols_) { + current_scan_target_.RemoveLastByte(); + } + + KeyEntryValue inf_val = + positive ? KeyEntryValue(KeyEntryType::kHighest) : KeyEntryValue(KeyEntryType::kLowest); + AppendToScanTarget(inf_val, col_idx); + } static ScanChoicesPtr Create( const Schema& schema, const DocQLScanSpec& doc_spec, const KeyBytes& lower_doc_key, @@ -68,6 +87,12 @@ class ScanChoices { const bool is_forward_scan_; KeyBytes current_scan_target_; bool finished_ = false; + bool has_hash_columns_ = false; + size_t num_hash_cols_; + + // True if CurrentTargetMatchesKey should return true all the time as + // the filter this ScanChoices iterates over is trivial. + bool is_trivial_filter_ = false; private: DISALLOW_COPY_AND_ASSIGN(ScanChoices); @@ -138,22 +163,22 @@ class ScanChoices { class OptionRange { public: OptionRange(KeyEntryValue lower, bool lower_inclusive, KeyEntryValue upper, bool upper_inclusive) - : lower_(lower), - lower_inclusive_(lower_inclusive), - upper_(upper), - upper_inclusive_(upper_inclusive), - begin_idx_(0), - end_idx_(0) {} + : OptionRange(std::move(lower), + lower_inclusive, + std::move(upper), + upper_inclusive, + 0 /* begin_idx_ */, + 0 /* end_idx_ */) {} OptionRange( - KeyEntryValue lower, bool lower_inclusive, KeyEntryValue upper, bool upper_inclusive, - size_t begin_idx, size_t end_idx) - : lower_(lower), - lower_inclusive_(lower_inclusive), - upper_(upper), - upper_inclusive_(upper_inclusive), - begin_idx_(begin_idx), - end_idx_(end_idx) {} + KeyEntryValue lower, bool lower_inclusive, KeyEntryValue upper, bool upper_inclusive, + size_t begin_idx, size_t end_idx) + : lower_(std::move(lower)), + lower_inclusive_(lower_inclusive), + upper_(std::move(upper)), + upper_inclusive_(upper_inclusive), + begin_idx_(begin_idx), + end_idx_(end_idx) {} // Convenience constructors for testing OptionRange(int begin, int end, SortOrder sort_order = SortOrder::kAscending) @@ -247,8 +272,8 @@ class HybridScanChoices : public ScanChoices { const KeyBytes& lower_doc_key, const KeyBytes& upper_doc_key, bool is_forward_scan, - const std::vector& range_options_col_ids, - const std::shared_ptr>& range_options, + const std::vector& options_col_ids, + const std::shared_ptr>& options, const std::vector& range_bounds_col_ids, const QLScanRange* range_bounds, const ColGroupHolder& col_groups, @@ -278,23 +303,31 @@ class HybridScanChoices : public ScanChoices { // incrementing the option index for an OptionList. Will handle overflow by setting current // index to 0 and incrementing the previous index instead. If it overflows at first index // it means we are done, so it clears the scan target idxs array. - Status IncrementScanTargetAtOptionList(int start_option_list_idx); + Status IncrementScanTargetAtOptionList(ssize_t start_option_list_idx); // Utility function for testing std::vector TEST_GetCurrentOptions(); + Result ValidateHashGroup(const KeyBytes& scan_target) const; private: + // Utility method to return a column corresponding to idx in the schema. + // This may be different from schema.column_id in the presence of the hash_code column. + ColumnId GetColumnId(const Schema& schema, size_t idx) const; + + Status DecodeKey(DocKeyDecoder* decoder, KeyEntryValue* target_value = nullptr) const; + // Returns an iterator reference to the lowest option in the current search // space of this option list index. See comment for OptionRange. - std::vector::const_iterator GetSearchSpaceLowerBound(size_t opt_list_idx); + std::vector::const_iterator GetSearchSpaceLowerBound(size_t opt_list_idx) const; // Returns an iterator reference to the exclusive highest option range in the current search // space of this option list index. See comment for OptionRange. - std::vector::const_iterator GetSearchSpaceUpperBound(size_t opt_list_idx); + std::vector::const_iterator GetSearchSpaceUpperBound(size_t opt_list_idx) const; // Gets the option range that corresponds to the given option index at the given // option list index. - std::vector::const_iterator GetOptAtIndex(size_t opt_list_idx, size_t opt_index); + std::vector::const_iterator GetOptAtIndex(size_t opt_list_idx, + size_t opt_index) const; // Sets the option that corresponds to the given option index at the given // logical option list index. @@ -308,8 +341,8 @@ class HybridScanChoices : public ScanChoices { // The following fields aid in the goal of iterating through all possible // scan key values based on given IN-lists and range filters. - // The following encodes the list of ranges we are iterating over - std::vector> range_cols_scan_options_; + // The following encodes the list of option we are iterating over + std::vector> scan_options_; // Vector of references to currently active elements being used // in range_cols_scan_options_ diff --git a/src/yb/master/yql_virtual_table.cc b/src/yb/master/yql_virtual_table.cc index a535a16bec26..a38aa5238e33 100644 --- a/src/yb/master/yql_virtual_table.cc +++ b/src/yb/master/yql_virtual_table.cc @@ -87,6 +87,7 @@ Status YQLVirtualTable::BuildYQLScanSpec( return STATUS(IllegalState, "system table contains no static columns"); } spec->reset(new QLScanSpec( + schema, request.has_where_expr() ? &request.where_expr().condition() : nullptr, request.has_if_expr() ? &request.if_expr().condition() : nullptr, request.is_forward_scan())); diff --git a/src/yb/util/col_group.h b/src/yb/util/col_group.h index 01bbf72a7cf1..86bdb0d77149 100644 --- a/src/yb/util/col_group.h +++ b/src/yb/util/col_group.h @@ -11,8 +11,7 @@ // under the License. // -#ifndef YB_UTIL_COL_GROUP_H -#define YB_UTIL_COL_GROUP_H +#pragma once #include #include @@ -50,7 +49,7 @@ class ColGroupHolder { // Returns a reference to the group idx is a part of. idx must have been // added to a group in this ColGroupHolder. - const std::vector &GetGroup(size_t idx) { + const std::vector &GetGroup(size_t idx) const { DCHECK_LT(idx, col_to_group_.size()); DCHECK_GE(col_to_group_[idx], 0); DCHECK_LT(col_to_group_[idx], groups_.size()); @@ -65,5 +64,3 @@ class ColGroupHolder { }; }; // namespace yb - -#endif // YB_UTIL_COL_GROUP_H diff --git a/src/yb/yql/pggate/pg_dml_read.cc b/src/yb/yql/pggate/pg_dml_read.cc index dcf2dc9fb0bf..f38ab7288806 100644 --- a/src/yb/yql/pggate/pg_dml_read.cc +++ b/src/yb/yql/pggate/pg_dml_read.cc @@ -189,6 +189,8 @@ Status PgDmlRead::ProcessEmptyPrimaryBinds() { // Collecting column indexes that are involved in a tuple std::vector tuple_col_ids; + bool preceding_key_column_missed = false; + for (size_t index = 0; index != bind_->num_hash_key_columns(); ++index) { auto expr = bind_.ColumnForIndex(index).bind_pb(); auto colid = bind_.ColumnForIndex(index).id(); @@ -204,6 +206,9 @@ Status PgDmlRead::ProcessEmptyPrimaryBinds() { } if (expr && expr->has_condition()) { + // Move any range column binds into the 'condition_expr' field if + // we are batching hash columns. + preceding_key_column_missed = pg_session_->IsHashBatchingEnabled(); const auto& lhs = *expr->condition().operands().begin(); if (lhs.has_tuple()) { const auto& tuple = lhs.tuple(); @@ -217,8 +222,6 @@ Status PgDmlRead::ProcessEmptyPrimaryBinds() { SCHECK(!has_partition_columns || !miss_partition_columns, InvalidArgument, "Partition key must be fully specified"); - bool preceding_key_column_missed = false; - if (miss_partition_columns) { VLOG(1) << "Full scan is needed"; read_req_->mutable_partition_column_values()->clear(); diff --git a/src/yb/yql/pggate/pg_doc_op.cc b/src/yb/yql/pggate/pg_doc_op.cc index 1576c758650a..971816428005 100644 --- a/src/yb/yql/pggate/pg_doc_op.cc +++ b/src/yb/yql/pggate/pg_doc_op.cc @@ -18,6 +18,7 @@ #include #include "yb/common/row_mark.h" +#include "yb/common/ybc_util.h" #include "yb/gutil/casts.h" #include "yb/gutil/strings/escaping.h" @@ -630,13 +631,150 @@ bool PgDocReadOp::GetNextPermutation(std::vector* p } return true; } +void PgDocReadOp::BindExprsRegular( + LWPgsqlReadRequestPB* read_req, + const std::vector& hashed_values, + const std::vector& range_values) { + DCHECK(!IsHashBatchingPossible()); + size_t index = 0; + // Bind all hash column values. + for (auto& partition_column : *read_req->mutable_partition_column_values()) { + partition_column = *hashed_values[index++]; + } + + // Deal with any range columns that are in this tuple IN. + // Create an equality condition for each column + for (auto range_idx : permutation_range_column_indexes_) { + auto* pgexpr = range_values[range_idx - table_->num_hash_key_columns()]; + DCHECK(pgexpr != nullptr); + + read_req->mutable_condition_expr()->mutable_condition()->set_op(QL_OP_AND); + auto* op = read_req->mutable_condition_expr()->mutable_condition()->add_operands(); + auto* pgcond = op->mutable_condition(); + pgcond->set_op(QL_OP_EQUAL); + pgcond->add_operands()->set_column_id(table_.ColumnForIndex(range_idx).id()); + *pgcond->add_operands() = *pgexpr; + } +} + +void PgDocReadOp::BindExprsToBatch( + const std::vector& hashed_values, + const std::vector& range_values) { + auto partition_key = table_->partition_schema().EncodePgsqlHash(hashed_values); + DCHECK(partition_key.ok()); + auto partition = client::FindPartitionStartIndex(table_->GetPartitionList(), *partition_key); + + if (hash_in_conds_.empty()) { + hash_in_conds_.resize(table_->GetPartitionListSize(), nullptr); + } + + // Identify the vector to which the key should be added. + if (!hash_in_conds_[partition]) { + hash_in_conds_[partition] = read_op_->arena().NewArenaObject(); + PrepareInitialHashConditionList(partition); + } + + DCHECK(hash_in_conds_[partition]->has_condition()); + auto* op = hash_in_conds_[partition]->mutable_condition()->mutable_operands(); + auto* rhs_values_list = op->back().mutable_value()->mutable_list_value(); + + // Add new hash keys to the end of the array in the format (yb_hash_code, hk1, hk2 .... hkn) + auto* tup_elements = rhs_values_list->add_elems()->mutable_tuple_value(); + auto* new_elem = tup_elements->add_elems(); + new_elem->set_int32_value(table_->partition_schema().DecodeMultiColumnHashValue(*partition_key)); + for (auto elem : hashed_values) { + auto new_elem = tup_elements->add_elems(); + if(elem->has_value()) { + DCHECK(elem->has_value()); + *new_elem = elem->value(); + } + } + + for (auto range_idx : permutation_range_column_indexes_) { + auto* elem = range_values[range_idx - table_->num_hash_key_columns()]; + DCHECK(elem != nullptr); + if(elem->has_value()) { + auto* new_elem = tup_elements->add_elems(); + *new_elem = elem->value(); + } + } +} + +bool PgDocReadOp::HasNextBatch() { + while (next_batch_partition_ < hash_in_conds_.size() && + hash_in_conds_[next_batch_partition_] == nullptr) { + next_batch_partition_++; + } + return next_batch_partition_ < hash_in_conds_.size(); +} + +static bool IsHashInCondition(const LWPgsqlExpressionPB& cond) { + if (cond.has_condition() && cond.condition().op() == QL_OP_IN) { + auto first_elem = cond.condition().operands().begin(); + return first_elem->has_tuple() && + first_elem->tuple().elems().begin()->has_column_id() && + first_elem->tuple().elems().begin()->column_id() == kYbHashCodeColId; + } + return false; +} + +Result PgDocReadOp::BindNextBatchToRequest(LWPgsqlReadRequestPB* read_req) { + DCHECK(read_req != nullptr); + + if (!HasNextBatch()) { + return true; + } + + auto current_partition = next_batch_partition_; + + auto* cond = hash_in_conds_[current_partition]; + + LWPgsqlExpressionPB* cond_bind_expr = nullptr; + + if (!read_req->has_condition_expr()) { + read_req->mutable_condition_expr()->mutable_condition()->set_op(QL_OP_AND); + cond_bind_expr = read_req->mutable_condition_expr()->mutable_condition()->add_operands(); + } else { + bool hash_in_found = false; + for (auto& cond_it : + *read_req->mutable_condition_expr()->mutable_condition()->mutable_operands()) { + // See if it already has a hash IN condition from a previous binding and clear it. + if (IsHashInCondition(cond_it)) { + cond_it.clear_condition(); + cond_bind_expr = &cond_it; + hash_in_found = true; + break; + } + } -void PgDocReadOp::BindPermutation(const std::vector& exprs, - LWPgsqlReadRequestPB* read_req) const { + if (!hash_in_found) { + cond_bind_expr = read_req->mutable_condition_expr()->mutable_condition()->add_operands(); + } + } + + cond_bind_expr->CopyFrom(*cond); + read_req->clear_partition_column_values(); + + VERIFY_RESULT(SetLowerUpperBound(read_req, current_partition)); + + next_batch_partition_++; + return false; +} + +bool PgDocReadOp::IsHashBatchingPossible() { + if (PREDICT_FALSE(!is_hash_batched_.has_value())) { + is_hash_batched_ = pg_session_->IsHashBatchingEnabled() && + total_permutation_count_ < FLAGS_ysql_hash_batch_permutation_limit; + } + return *is_hash_batched_; +} + +void PgDocReadOp::BindPermutation(const std::vector& exprs, + LWPgsqlReadRequestPB* read_req) { const size_t hash_column_count = table_->num_hash_key_columns(); - std::vector hash_exprs(hash_column_count, + std::vector hash_exprs(hash_column_count, nullptr); - std::vector> range_exprs; + std::vector range_exprs(table_->num_range_key_columns(), nullptr); auto cond_iter = read_op_->read_request().partition_column_values().begin(); size_t index = 0; for (auto* expr : exprs) { @@ -659,7 +797,7 @@ void PgDocReadOp::BindPermutation(const std::vector if (tup_c_idx < hash_column_count) { hash_exprs[tup_c_idx] = pgexpr; } else { - range_exprs.emplace_back(tup_c_idx, pgexpr); + range_exprs[tup_c_idx - hash_column_count] = pgexpr; } val_it++; } @@ -671,44 +809,82 @@ void PgDocReadOp::BindPermutation(const std::vector index++; } - index = 0; - // Bind all hash column values. - for (auto it = read_req->mutable_partition_column_values()->begin(); - it != read_req->mutable_partition_column_values()->end(); - ++it) { - *it = *hash_exprs[index++]; - } - - // Deal with any range columns that are in this tuple IN. - // Create an equality condition for each column - for (auto [c_idx, pgexpr] : range_exprs) { - read_req->mutable_condition_expr()->mutable_condition()->set_op(QL_OP_AND); - auto* op = read_req->mutable_condition_expr()->mutable_condition()->add_operands(); - auto* pgcond = op->mutable_condition(); - pgcond->set_op(QL_OP_EQUAL); - pgcond->add_operands()->set_column_id(table_.ColumnForIndex(c_idx).id()); - *pgcond->add_operands() = *pgexpr; + if (IsHashBatchingPossible()) { + BindExprsToBatch(hash_exprs, range_exprs); + } else { + BindExprsRegular(read_req, hash_exprs, range_exprs); } } -bool PgDocReadOp::PopulateNextHashPermutationOps() { +Result PgDocReadOp::PopulateNextHashPermutationOps() { InitializeHashPermutationStates(); std::vector current_permutation; current_permutation.reserve(partition_exprs_.size()); - // Fill inactive operators with new hash permutations. + // Bind new hash permutations. + while (HasNextPermutation()) { - auto* read_req = PrepareReadReq(); - if (!read_req) { + LWPgsqlReadRequestPB* read_req = nullptr; + if (!IsHashBatchingPossible() && + (read_req = PrepareReadReq()) == nullptr) { return false; } current_permutation.clear(); GetNextPermutation(¤t_permutation); - BindPermutation(current_permutation, read_req); + + // read_req is nullptr if batching is on as we don't want to + // bind the permutation to a request yet. We instead want to bind + // it to one of the batches we are building up. + BindPermutation(current_permutation, read_req); + } + + // If batching is enabled, we have built up all our batches and now + // we can bind each batch to a request. + if (IsHashBatchingPossible()) { + LWPgsqlReadRequestPB* read_req = nullptr; + while (HasNextBatch()) { + read_req = PrepareReadReq(); + if (!read_req) { + return false; + } + VERIFY_RESULT(BindNextBatchToRequest(read_req)); + } } + + MoveInactiveOpsOutside(); return true; } +// Add the list of column ids as a tuple. Subsequently every tuple of hash key columns that are +// added will be in the same order as the column ids. +void PgDocReadOp::PrepareInitialHashConditionList(size_t partition) { + DCHECK(hash_in_conds_[partition] != nullptr); + hash_in_conds_[partition]->mutable_condition()->set_op(QL_OP_IN); + auto* add_targets = + hash_in_conds_[partition]->mutable_condition()->add_operands()->mutable_tuple(); + + // YB columns adheres to the following order --> hash key columns and then range key columns + // We also follow the same order. However, each hash key tuple also contains the yb_hash_code + // as the first element as docDB scans require that. Hence, in order to match that column id + // we add the column id of yb_hash_code. However, yb_hash_code has no column id. And hence we + // assign kFirstColumnIdRep -1 as the column id for yb_hash_code. + auto* yb_hash_code = add_targets->add_elems(); + yb_hash_code->set_column_id(kYbHashCodeColId); + for (size_t i = 0; i < table_->num_hash_key_columns(); i++) { + auto& col = table_.ColumnForIndex(i); + auto* hash_column = add_targets->add_elems(); + hash_column->set_column_id(col.id()); + } + for (auto c_idx : permutation_range_column_indexes_) { + auto& col = table_.ColumnForIndex(c_idx); + auto* range_column = add_targets->add_elems(); + range_column->set_column_id(col.id()); + } + + // RHS of IN condition + hash_in_conds_[partition]->mutable_condition()->add_operands(); +} + // Collect hash expressions to prepare for generating permutations. void PgDocReadOp::InitializeHashPermutationStates() { // Return if state variables were initialized. @@ -719,13 +895,27 @@ void PgDocReadOp::InitializeHashPermutationStates() { } // Initialize partition_exprs_. - // Reorganize the input arguments from Postgres to prepre for permutation generation. + // Reorganize the input arguments from Postgres to prepare for permutation generation. auto hash_column_count = table_->num_hash_key_columns(); partition_exprs_.resize(hash_column_count); auto c_idx = 0; for (const auto& col_expr : read_op_->read_request().partition_column_values()) { if (col_expr.has_condition()) { - auto it = ++col_expr.condition().operands().begin(); + auto it = col_expr.condition().operands().begin(); + + // Populate permutation_range_column_indexes_. + if (it->has_tuple()) { + for (const auto& lhs_elem : it->tuple().elems()) { + // Get the value for this column in the tuple. + size_t tup_c_idx = lhs_elem.column_id() - table_->schema().first_column_id(); + + if (tup_c_idx >= hash_column_count) { + permutation_range_column_indexes_.emplace_back(tup_c_idx); + } + } + } + + ++it; for (const auto& expr : it->condition().operands()) { partition_exprs_[c_idx].push_back(&expr); } @@ -735,6 +925,9 @@ void PgDocReadOp::InitializeHashPermutationStates() { ++c_idx; } + // Make sure permutation_range_column_indexes_ is ordered. + std::sort(permutation_range_column_indexes_.begin(), permutation_range_column_indexes_.end()); + // Calculate the total number of permutations to be generated. total_permutation_count_ = 1; for (const auto& exprs : partition_exprs_) { @@ -751,16 +944,23 @@ void PgDocReadOp::InitializeHashPermutationStates() { // TODO(neil) The control variable "ysql_request_limit" should be applied to ALL statements, but // at the moment, the number of operators never exceeds the number of tablets except for hash // permutation operation, so the work on this GFLAG can be done when it is necessary. - auto max_op_count = std::min(total_permutation_count_, - implicit_cast(FLAGS_ysql_request_limit)); + auto max_op_count = + std::min(IsHashBatchingPossible() ? + static_cast(table_->GetPartitionListSize()) : total_permutation_count_, + implicit_cast(FLAGS_ysql_request_limit)); ClonePgsqlOps(max_op_count); // Clear the original partition expressions as it will be replaced with hash permutations. for (size_t op_index = 0; op_index < max_op_count; ++op_index) { auto& read_request = GetReadReq(op_index); read_request.mutable_partition_column_values()->clear(); - for (size_t i = 0; i < hash_column_count; ++i) { - read_request.add_partition_column_values(); + if (IsHashBatchingPossible()) { + read_request.clear_hash_code(); + read_request.clear_max_hash_code(); + } else { + for (size_t i = 0; i < hash_column_count; ++i) { + read_request.add_partition_column_values(); + } } pgsql_ops_[op_index]->set_active(false); } diff --git a/src/yb/yql/pggate/pg_doc_op.h b/src/yb/yql/pggate/pg_doc_op.h index 42d1d5aa5070..cff68d0d44f2 100644 --- a/src/yb/yql/pggate/pg_doc_op.h +++ b/src/yb/yql/pggate/pg_doc_op.h @@ -493,6 +493,8 @@ class PgDocReadOp : public PgDocOp { // - After being queried from inner select, ybctids are used for populate request for outer query. void InitializeYbctidOperators(); + bool IsHashBatchingPossible(); + // Create operators by partition arguments. // - Optimization for statement: // SELECT ... WHERE IN @@ -501,9 +503,46 @@ class PgDocReadOp : public PgDocOp { // - When an operator is assigned a hash permutation, it is marked as active to be executed. // - When an operator completes the execution, it is marked as inactive and available for the // exection of the next hash permutation. - bool PopulateNextHashPermutationOps(); + Result PopulateNextHashPermutationOps(); void InitializeHashPermutationStates(); + // Binds the given hash and range values to the given read request. + // hashed_values and range_values have the same descriptions as in BindExprsToBatch. + void BindExprsRegular( + LWPgsqlReadRequestPB* read_req, + const std::vector& hashed_values, + const std::vector& range_values); + + // Helper functions for when we are batching hash permutations where + // we are creating an IN condition of the form + // (yb_hash_code(hashkeys), h1, h2, ..., hn) IN (tuple_1, tuple_2, tuple_3, ...) + + // This operates by creating one such IN condition for each partition we are sending + // our query to and buidling each of them up in hash_in_conds_[partition_idx]. We make sure + // that each permutation value goes into the correct partition's condition. We then + // make one read op clone per partition and for each one we bind their respective condition + // from hash_in_conds_[partition_idx]. + + // This prepares the LHS of the hash IN condition for a particular partition. + void PrepareInitialHashConditionList(size_t partition); + + // Binds the given hash and range values to whatever partition in hash_in_conds_ + // the hashed values suggest. The range_values vector is expected to be a + // vector of size num_range_keys where a range_values[i] == nullptr iff + // the ith range column is not relevant to the IN condition we are building + // up. + void BindExprsToBatch( + const std::vector& hashed_values, + const std::vector& range_values); + + // These functions are used to iterate over each partition batch and bind them to a request. + + // Returns false if we are done iterating over our partition batches. + bool HasNextBatch(); + + // Binds the next partition batch available to the given request's condition expression. + Result BindNextBatchToRequest(LWPgsqlReadRequestPB* read_req); + // Helper functions for PopulateNextHashPermutationOps // Prepares a new read request from the pool of inactive operators. LWPgsqlReadRequestPB* PrepareReadReq(); @@ -513,7 +552,7 @@ class PgDocReadOp : public PgDocOp { bool GetNextPermutation(std::vector* exprs); // Binds a given permutation of partition expressions to the given read request. void BindPermutation(const std::vector& exprs, - LWPgsqlReadRequestPB* read_op) const; + LWPgsqlReadRequestPB* read_op); // Create operators by partitions. // - Optimization for aggregating or filtering requests. @@ -579,6 +618,30 @@ class PgDocReadOp : public PgDocOp { //----------------------------------- Data Members ----------------------------------------------- + // Whether or not we are using hash permutation batching for this op. + boost::optional is_hash_batched_; + + // Pointer to the per tablet hash component condition expression. For each hash key + // combination, once we identify the partition at which it should be executed, we enqueue + // it into this vector among the other hash keys that are to be executed in that tablet. + // This vector contains a reference to the vector that contains the list of hash keys + // that are supposed to be executed in each tablet. + + // This is a vector of IN condition expressions for each partition. In each partition's + // condition expression the LHS is a tuple of the hash code and hash keys and the RHS + // is built up to eventually be a list of all the hash key permutations + // that belong to that partition. These conditions are eventually bound + // to a read op's condition expression. + std::vector hash_in_conds_; + + // Sometimes in the final hash IN condition's LHS, range columns may be involved. + // For example, if we get a filter of the form (h1, h2, r2) IN (t2, t2, t3), commonly found + // in the case of batched nested loop joins. These should be sorted. + std::vector permutation_range_column_indexes_; + + // Used when iterating over the partition batches in hash_in_conds_. + size_t next_batch_partition_ = 0; + // Template operation, used to fill in pgsql_ops_ by either assigning or cloning. PgsqlReadOpPtr read_op_; diff --git a/src/yb/yql/pggate/pg_session.cc b/src/yb/yql/pggate/pg_session.cc index a4d3b584d93e..83c5279d9f43 100644 --- a/src/yb/yql/pggate/pg_session.cc +++ b/src/yb/yql/pggate/pg_session.cc @@ -568,6 +568,11 @@ PgIsolationLevel PgSession::GetIsolationLevel() { return pg_txn_manager_->GetPgIsolationLevel(); } +bool PgSession::IsHashBatchingEnabled() { + return yb_enable_hash_batch_in && + GetIsolationLevel() != PgIsolationLevel::SERIALIZABLE; +} + Result PgSession::IsInitDbDone() { return pg_client_.IsInitDbDone(); } diff --git a/src/yb/yql/pggate/pg_session.h b/src/yb/yql/pggate/pg_session.h index 1041b85699d5..74afaa9e22fb 100644 --- a/src/yb/yql/pggate/pg_session.h +++ b/src/yb/yql/pggate/pg_session.h @@ -220,6 +220,8 @@ class PgSession : public RefCountedThreadSafe { PgIsolationLevel GetIsolationLevel(); + bool IsHashBatchingEnabled(); + // Run (apply + flush) list of given operations to read and write database content. template struct TableOperation { diff --git a/src/yb/yql/pggate/pggate_flags.cc b/src/yb/yql/pggate/pggate_flags.cc index 5a7de96fb9fc..d93a95554222 100644 --- a/src/yb/yql/pggate/pggate_flags.cc +++ b/src/yb/yql/pggate/pggate_flags.cc @@ -39,6 +39,9 @@ DEFINE_UNKNOWN_int32(pggate_tserver_shm_fd, -1, DEFINE_test_flag(bool, pggate_ignore_tserver_shm, false, "Ignore the shared memory of the local tablet server."); +DEFINE_UNKNOWN_uint64(ysql_hash_batch_permutation_limit, 100000, + "Maximum number of hash permutations allowed in hash batching mode."); + DEFINE_UNKNOWN_int32(ysql_request_limit, 1024, "Maximum number of requests to be sent at once"); diff --git a/src/yb/yql/pggate/pggate_flags.h b/src/yb/yql/pggate/pggate_flags.h index 60ed7ddf4b80..8214774909ff 100644 --- a/src/yb/yql/pggate/pggate_flags.h +++ b/src/yb/yql/pggate/pggate_flags.h @@ -21,6 +21,7 @@ DECLARE_int32(pggate_ybclient_reactor_threads); DECLARE_string(pggate_master_addresses); DECLARE_int32(pggate_tserver_shm_fd); DECLARE_int32(ysql_request_limit); +DECLARE_uint64(ysql_hash_batch_permutation_limit); DECLARE_uint64(ysql_prefetch_limit); DECLARE_double(ysql_backward_prefetch_scale_factor); DECLARE_uint64(ysql_session_max_batch_size); diff --git a/src/yb/yql/pgwrapper/pg_wrapper.cc b/src/yb/yql/pgwrapper/pg_wrapper.cc index 3836b5ca3c62..3e737290ca71 100644 --- a/src/yb/yql/pgwrapper/pg_wrapper.cc +++ b/src/yb/yql/pgwrapper/pg_wrapper.cc @@ -150,6 +150,9 @@ DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_enable_expression_pushdown, kLocalVolatile, DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_pushdown_strict_inequality, kLocalVolatile, false, true, "Push down strict inequality filters"); +DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_enable_hash_batch_in, kLocalVolatile, false, true, + "Enable batching of hash in queries."); + DEFINE_RUNTIME_AUTO_PG_FLAG(bool, yb_bypass_cond_recheck, kLocalVolatile, false, true, "Bypass index condition recheck at the YSQL layer if the condition was pushed down.");