-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[#7836] YSQL: Batching IN queries on Hash Keys
Summary: Before this change, IN conditions bound to hash key columns produce one request per possible values of the hash keys. For example, consider a query `SELECT * FROM sample_table WHERE h1 IN (1,4,6,8);` where sample_table has a primary index with `h1` as its full hash component. We send 4 requests, one per each in element, of the form `SELECT * FROM sample_table WHERE h1 = 1;`, `SELECT * FROM sample_table WHERE h1 = 4;` etc. If the IN condition was bound to a range column, we would send the entire filter at once as a singular condition and send just one request per partition of `sample_table_pkey`. The reason why we couldn't do this with hash column IN filters was because the `DocRowwiseIterator` could not perform skip scans over hash columns so it did not have the necessary infrastructure to process IN conditions on hash columns. This diff fixes the above issue by having IN conditions on hash columns behave similar to those on range columns. In order to do this, we did the following changes: - We adjusted pgsql/qlscanspec and ScanChoices to be able to carry out skip scans on hash column IN conditions. - We added infrastructure in pg_doc_op.h to convert IN filters of the form `h1 IN (v1,v2,...,vn)` a condition expression of the form `(yb_hash_code(h1), h1) IN ((yb_hash_code(v1), v1), (yb_hash_code(v2), v2), (yb_hash_code(v3), v3), ..., (yb_hash_code(vn), vn))`. If we have multiple hash partitions on the table we form one request per partition and the RHS of the hash condition on each partition request is ensured to only have values from (v1,v2,...vn) that are relevant to it. This feature also works similarly for multicolumn hash keys. This feature is disabled when serializable isolation level is used for now as there isn't infrastructure to lock multiple non-contiguous rows as such filters would require. This feature's enablement is controlled by the autoflag GUC `yb_enable_hash_batch_in`. We also added a tserver flag `ysql_hash_batch_permutation_limit` that specifies a limit on the number of hash permutations a query must produce in order to be eligible to use this feature. Without this check, we can materialize an unbounded number of hash permutations in memory and cause an OOM crash. Test Plan: ``` ./yb_build.sh release --java-test org.yb.pgsql.TestPgRegressIndex ./yb_build.sh --java-test 'org.yb.pgsql.TestPgRegressHashInQueries' ``` Reviewers: smishra, neil, amartsinchyk, kpopali Reviewed By: kpopali Subscribers: mbautin, kpopali, kannan, ssong, yql, mihnea, bogdan Differential Revision: https://phabricator.dev.yugabyte.com/D19672
- Loading branch information
1 parent
dbd16ee
commit fc57665
Showing
43 changed files
with
2,337 additions
and
510 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
187 changes: 187 additions & 0 deletions
187
java/yb-pgsql/src/test/java/org/yb/pgsql/TestPgRegressHashInQueries.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,187 @@ | ||
// Copyright (c) YugaByte, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except | ||
// in compliance with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software distributed under the License | ||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express | ||
// or implied. See the License for the specific language governing permissions and limitations | ||
// under the License. | ||
// | ||
|
||
package org.yb.pgsql; | ||
|
||
import org.junit.Test; | ||
import org.junit.runner.RunWith; | ||
|
||
import org.yb.util.YBTestRunnerNonTsanOnly; | ||
import org.yb.util.RegexMatcher; | ||
|
||
import java.sql.Connection; | ||
import java.sql.Statement; | ||
|
||
import java.util.HashSet; | ||
import java.util.Set; | ||
|
||
import static org.yb.AssertionWrappers.*; | ||
|
||
@RunWith(value=YBTestRunnerNonTsanOnly.class) | ||
public class TestPgRegressHashInQueries extends BasePgSQLTest { | ||
|
||
@Override | ||
protected Integer getYsqlRequestLimit() { | ||
// This is so number of roundtrips equals the number of request operators created. | ||
return 1; | ||
} | ||
|
||
@Test | ||
public void testInQueryBatchingOnHashKey() throws Exception { | ||
|
||
String createTable = "CREATE TABLE t1 (a int PRIMARY KEY, b int) SPLIT INTO 3 TABLETS"; | ||
String insertTable = "INSERT INTO t1 SELECT i, i FROM (SELECT generate_series(1, 1024) i) t"; | ||
|
||
try (Statement statement = connection.createStatement()) { | ||
statement.execute(createTable); | ||
statement.execute(insertTable); | ||
} | ||
|
||
// Generate select query required to run batched IN. | ||
// SELECT * FROM t1 WHERE a IN (1, 2, 3, .... 511, 512); | ||
int num_rows = 512; | ||
String query = "SELECT * FROM t1 WHERE a IN ("; | ||
for(int i = 1; i < num_rows; ++i) { | ||
query += i + ", "; | ||
} | ||
query += num_rows + ")"; | ||
Set<Row> expectedRows = new HashSet<>(); | ||
for (int i = 1; i <= num_rows; i++) { | ||
expectedRows.add(new Row(i, i)); | ||
} | ||
|
||
try (Statement statement = connection.createStatement()) { | ||
statement.execute("SET yb_enable_hash_batch_in = false"); | ||
long noBatchingNumRequests = getNumStorageRoundtrips(statement, query); | ||
assertEquals(512, noBatchingNumRequests); | ||
assertRowSet(statement, query, expectedRows); | ||
|
||
statement.execute("SET yb_enable_hash_batch_in = true"); | ||
long batchingNumRequests = getNumStorageRoundtrips(statement, query); | ||
assertRowSet(statement, query, expectedRows); | ||
// We send three requests as the number of tablets created are three. | ||
assertEquals(3, batchingNumRequests); | ||
} | ||
} | ||
|
||
@Test | ||
public void testInQueryBatchingOnMixedKey() throws Exception { | ||
|
||
String createTable = | ||
"CREATE TABLE t1 (a int, b int, PRIMARY KEY(a hash, b asc)) SPLIT INTO 3 TABLETS"; | ||
String insertTable1 = | ||
"INSERT INTO t1 SELECT i, i FROM (SELECT generate_series(1, 1024) i) t"; | ||
String insertTable2 = | ||
"INSERT INTO t1 SELECT i, i+1 FROM (SELECT generate_series(1, 1024) i) t"; | ||
|
||
try (Statement statement = connection.createStatement()) { | ||
statement.execute(createTable); | ||
statement.execute(insertTable1); | ||
statement.execute(insertTable2); | ||
} | ||
|
||
// Generate select query required to run batched IN. | ||
// SELECT * FROM t1 WHERE a IN (1, 2, 3, .... 511, 512); | ||
int upper_limit = 512; | ||
String query = "SELECT * FROM t1 WHERE a IN ("; | ||
for(int i = 1; i < upper_limit; ++i) { | ||
query += i + ", "; | ||
} | ||
query += upper_limit + ") AND b IN ("; | ||
|
||
for(int i = 1; i < upper_limit; ++i) { | ||
if ((i % 2) == 0) { | ||
query += i + ", "; | ||
} | ||
} | ||
query += upper_limit + ")"; | ||
|
||
Set<Row> expectedRows = new HashSet<>(); | ||
for (int i = 1; i <= upper_limit; i++) { | ||
if ((i % 2) == 1) { | ||
expectedRows.add(new Row(i, i+1)); | ||
} else { | ||
expectedRows.add(new Row(i, i)); | ||
} | ||
} | ||
|
||
try (Statement statement = connection.createStatement()) { | ||
statement.execute("SET yb_enable_hash_batch_in = false"); | ||
long noBatchingNumRequests = getNumStorageRoundtrips(statement, query); | ||
assertEquals(512, noBatchingNumRequests); | ||
assertRowSet(statement, query, expectedRows); | ||
|
||
statement.execute("SET yb_enable_hash_batch_in = true"); | ||
long batchingNumRequests = getNumStorageRoundtrips(statement, query); | ||
assertRowSet(statement, query, expectedRows); | ||
// We send three requests as the number of tablets created are three. | ||
assertEquals(3, batchingNumRequests); | ||
} | ||
} | ||
|
||
@Test | ||
public void testInQueryBatchingNestLoopHashKey() throws Exception { | ||
String createTable1 = "CREATE TABLE x (a int PRIMARY KEY, b int) SPLIT INTO 3 TABLETS"; | ||
String insertTable1 = "INSERT INTO x SELECT i*2, i FROM (SELECT generate_series(1, 4096) i) t"; | ||
String createTable2 = "CREATE TABLE y (a int PRIMARY KEY, b int) SPLIT INTO 3 TABLETS"; | ||
String insertTable2 = "INSERT INTO y SELECT i*5, i FROM (SELECT generate_series(1, 4096) i) t"; | ||
|
||
try (Statement statement = connection.createStatement()) { | ||
statement.execute(createTable1); | ||
statement.execute(insertTable1); | ||
statement.execute(createTable2); | ||
statement.execute(insertTable2); | ||
} | ||
|
||
// Generate NL Join query and enable NL Join batching in it with different batch sizes. | ||
// These get automatically converted to batched IN queries. We should expect the best | ||
// performance when we enable IN batching. | ||
String query = "SELECT * FROM x t1 JOIN y t2 ON t1.a = t2.a"; | ||
|
||
Set<Row> expectedRows = new HashSet<>(); | ||
for (int i = 1; i <= 819; i++) { | ||
expectedRows.add(new Row(i*10, i*5, i*10, i*2)); | ||
} | ||
|
||
try (Statement statement = connection.createStatement()) { | ||
// Enabling NL Join batching | ||
statement.execute("SET enable_hashjoin = off"); | ||
statement.execute("SET enable_mergejoin = off"); | ||
statement.execute("SET enable_seqscan = off"); | ||
statement.execute("SET enable_material = off"); | ||
|
||
statement.execute("SET yb_bnl_batch_size = 3;"); | ||
statement.execute("SET yb_enable_hash_batch_in = false"); | ||
long noBatchingSmallBatchSizeNumRPCs = getNumStorageRoundtrips(statement, query); | ||
assertEquals(4102, noBatchingSmallBatchSizeNumRPCs); | ||
assertRowSet(statement, query, expectedRows); | ||
|
||
statement.execute("SET yb_bnl_batch_size = 1024;"); | ||
statement.execute("SET yb_enable_hash_batch_in = false"); | ||
long noBatchingLargeBatchSizeNumRPCs = getNumStorageRoundtrips(statement, query); | ||
assertEquals(4102, noBatchingLargeBatchSizeNumRPCs); | ||
assertRowSet(statement, query, expectedRows); | ||
|
||
statement.execute("SET yb_bnl_batch_size = 1024;"); | ||
statement.execute("SET yb_enable_hash_batch_in = true"); | ||
long batchingLargeBatchSizeNumRPCs = getNumStorageRoundtrips(statement, query); | ||
assertEquals(12, batchingLargeBatchSizeNumRPCs); | ||
assertRowSet(statement, query, expectedRows); | ||
} | ||
} | ||
|
||
@Test | ||
public void schedule() throws Exception { | ||
runPgRegressTest("yb_hash_in_schedule"); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.