diff --git a/src/tests/sqlsmith/README.md b/src/tests/sqlsmith/README.md
index 4ced2616990c2..0488396219998 100644
--- a/src/tests/sqlsmith/README.md
+++ b/src/tests/sqlsmith/README.md
@@ -15,6 +15,13 @@ This test will be run as a unit test:
./risedev test -E "package(risingwave_sqlsmith)" --features enable_sqlsmith_unit_test
```
+## Generate snapshots
+
+Take a look at [`gen_queries.sh`](scripts/gen_queries.sh).
+
+Caveat: Even with a given snapshot, certain parts of the system are non-determninistic.
+For instance with scheduler errors, the same query may not trigger errors when executed.
+
## E2E
In the second mode, it will test the entire query handling end-to-end. We provide a CLI tool that represents a Postgres client. You can run this tool via:
diff --git a/src/tests/sqlsmith/src/lib.rs b/src/tests/sqlsmith/src/lib.rs
index 232e0b75ba1e4..6de404a09bb5b 100644
--- a/src/tests/sqlsmith/src/lib.rs
+++ b/src/tests/sqlsmith/src/lib.rs
@@ -42,7 +42,6 @@ pub fn sql_gen(rng: &mut impl Rng, tables: Vec
) -> String {
}
/// Generate `INSERT`
-#[allow(dead_code)]
pub fn insert_sql_gen(rng: &mut impl Rng, tables: Vec, count: usize) -> Vec {
let mut gen = SqlGenerator::new(rng, vec![]);
tables
diff --git a/src/tests/sqlsmith/src/runner.rs b/src/tests/sqlsmith/src/runner.rs
index b0151089b613c..7877bcf360c4f 100644
--- a/src/tests/sqlsmith/src/runner.rs
+++ b/src/tests/sqlsmith/src/runner.rs
@@ -155,15 +155,16 @@ pub async fn run(client: &Client, testdata: &str, count: usize, seed: Option) -> impl Rng {
}
}
-#[allow(dead_code)]
async fn populate_tables(
client: &Client,
rng: &mut R,
@@ -274,7 +274,6 @@ async fn test_session_variable(client: &Client, rng: &mut R) -> String {
}
/// Expects at least 50% of inserted rows included.
-#[allow(dead_code)]
async fn test_population_count(client: &Client, base_tables: Vec, expected_count: usize) {
let mut actual_count = 0;
for t in base_tables {
diff --git a/src/tests/sqlsmith/src/sql_gen/expr.rs b/src/tests/sqlsmith/src/sql_gen/expr.rs
index 1447f6ea2ced5..e5de9bc93e0b8 100644
--- a/src/tests/sqlsmith/src/sql_gen/expr.rs
+++ b/src/tests/sqlsmith/src/sql_gen/expr.rs
@@ -239,12 +239,9 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
let casts = EXPLICIT_CAST_TABLE.get(ret)?;
let cast_sig = casts.choose(&mut self.rng).unwrap();
- use CastContext as T;
match cast_sig.context {
- T::Explicit => {
- let expr = self
- .gen_expr(&cast_sig.from_type, context.set_inside_explicit_cast())
- .into();
+ CastContext::Explicit => {
+ let expr = self.gen_expr(&cast_sig.from_type, context).into();
let data_type = data_type_to_ast_data_type(&cast_sig.to_type);
Some(Expr::Cast { expr, data_type })
}
@@ -693,6 +690,9 @@ pub(crate) fn sql_null() -> Expr {
Expr::Value(Value::Null)
}
+// TODO(kwannoel):
+// Add variadic function signatures. Can add these functions
+// to a FUNC_TABLE too.
pub fn print_function_table() -> String {
let func_str = func_sigs()
.map(|sign| {
diff --git a/src/tests/sqlsmith/src/sql_gen/insert.rs b/src/tests/sqlsmith/src/sql_gen/insert.rs
index fe8bc29fe84a5..b5c2aa575fed5 100644
--- a/src/tests/sqlsmith/src/sql_gen/insert.rs
+++ b/src/tests/sqlsmith/src/sql_gen/insert.rs
@@ -21,7 +21,6 @@ use crate::sql_gen::SqlGenerator;
use crate::Table;
impl<'a, R: Rng> SqlGenerator<'a, R> {
- #[allow(dead_code)]
pub(crate) fn gen_insert_stmt(&mut self, table: Table, row_count: usize) -> Statement {
let table_name = ObjectName(vec![table.name.as_str().into()]);
let data_types = table
diff --git a/src/tests/sqlsmith/src/sql_gen/mod.rs b/src/tests/sqlsmith/src/sql_gen/mod.rs
index 11b73021e306f..2a73d791fe76d 100644
--- a/src/tests/sqlsmith/src/sql_gen/mod.rs
+++ b/src/tests/sqlsmith/src/sql_gen/mod.rs
@@ -78,16 +78,13 @@ pub(crate) struct SqlGeneratorContext {
// Used in top level, where we want to test queries
// without aggregates.
inside_agg: bool,
- inside_explicit_cast: bool,
}
-#[allow(dead_code)]
impl SqlGeneratorContext {
pub fn new() -> Self {
SqlGeneratorContext {
can_agg: true,
inside_agg: false,
- inside_explicit_cast: false,
}
}
@@ -95,7 +92,6 @@ impl SqlGeneratorContext {
Self {
can_agg,
inside_agg: false,
- inside_explicit_cast: false,
}
}
@@ -106,17 +102,6 @@ impl SqlGeneratorContext {
}
}
- pub fn set_inside_explicit_cast(self) -> Self {
- Self {
- inside_explicit_cast: true,
- ..self
- }
- }
-
- pub fn can_implicit_cast(self) -> bool {
- !self.inside_explicit_cast
- }
-
pub fn can_gen_agg(self) -> bool {
self.can_agg && !self.inside_agg
}
diff --git a/src/tests/sqlsmith/src/sql_gen/relation.rs b/src/tests/sqlsmith/src/sql_gen/relation.rs
index 86e57497bf532..3624d852fce41 100644
--- a/src/tests/sqlsmith/src/sql_gen/relation.rs
+++ b/src/tests/sqlsmith/src/sql_gen/relation.rs
@@ -19,17 +19,18 @@ use risingwave_sqlparser::ast::{
Ident, ObjectName, TableAlias, TableFactor, TableWithJoins, Value,
};
+use crate::sql_gen::types::BINARY_INEQUALITY_OP_TABLE;
use crate::sql_gen::{Column, SqlGenerator, SqlGeneratorContext};
use crate::{BinaryOperator, Expr, Join, JoinConstraint, JoinOperator, Table};
-fn create_equi_expr(left: String, right: String) -> Expr {
+fn create_binary_expr(op: BinaryOperator, left: String, right: String) -> Expr {
let left = Box::new(Expr::Identifier(Ident::new_unchecked(left)));
let right = Box::new(Expr::Identifier(Ident::new_unchecked(right)));
- Expr::BinaryOp {
- left,
- op: BinaryOperator::Eq,
- right,
- }
+ Expr::BinaryOp { left, op, right }
+}
+
+fn create_equi_expr(left: String, right: String) -> Expr {
+ create_binary_expr(BinaryOperator::Eq, left, right)
}
impl<'a, R: Rng> SqlGenerator<'a, R> {
@@ -39,13 +40,22 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
match self.rng.gen_range(0..=range) {
0..=0 => self.gen_simple_table(),
1..=1 => self.gen_time_window_func(),
- 2..=3 => self.gen_join_clause(),
- 4..=4 => self.gen_table_subquery(),
+ 2..=3 => self
+ .gen_simple_join_clause()
+ .unwrap_or_else(|| self.gen_simple_table()),
+ 4..=4 => self.gen_more_joins(),
+ 5..=5 => self.gen_table_subquery(),
+ // TODO(kwannoel): cycles, bushy joins.
_ => unreachable!(),
}
}
fn gen_simple_table(&mut self) -> (TableWithJoins, Vec) {
+ let (table_with_joins, table) = self.gen_simple_table_inner();
+ (table_with_joins, vec![table])
+ }
+
+ fn gen_simple_table_inner(&mut self) -> (TableWithJoins, Table) {
let (relation, _, table) = self.gen_simple_table_factor();
(
@@ -57,7 +67,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
)
}
- fn gen_simple_table_factor(&mut self) -> (TableFactor, Vec, Vec) {
+ fn gen_simple_table_factor(&mut self) -> (TableFactor, Vec, Table) {
let alias = self.gen_table_name_with_prefix("t");
let mut table = self.tables.choose(&mut self.rng).unwrap().clone();
let table_factor = TableFactor::Table {
@@ -70,10 +80,10 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
};
table.name = alias; // Rename the table.
let columns = table.get_qualified_columns();
- (table_factor, columns, vec![table])
+ (table_factor, columns, table)
}
- fn gen_table_factor(&mut self) -> (TableFactor, Vec, Vec) {
+ fn gen_table_factor(&mut self) -> (TableFactor, Vec, Table) {
let current_context = self.new_local_context();
let factor = self.gen_table_factor_inner();
self.restore_context(current_context);
@@ -82,97 +92,179 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
/// Generates a table factor, and provides bound columns.
/// Generated column names should be qualified by table name.
- fn gen_table_factor_inner(&mut self) -> (TableFactor, Vec, Vec) {
+ fn gen_table_factor_inner(&mut self) -> (TableFactor, Vec, Table) {
// TODO: TableFactor::Derived, TableFactor::TableFunction, TableFactor::NestedJoin
self.gen_simple_table_factor()
}
- /// TODO:
- /// Generate equi join with columns not of the same type,
- /// use functions to transform to same type.
fn gen_equi_join_columns(
&mut self,
left_columns: Vec,
right_columns: Vec,
- ) -> Option<(Column, Column)> {
+ ) -> Vec<(Column, Column)> {
let mut available_join_on_columns = vec![];
for left_column in &left_columns {
for right_column in &right_columns {
if left_column.data_type == right_column.data_type {
- available_join_on_columns.push((left_column, right_column))
+ available_join_on_columns.push((left_column.clone(), right_column.clone()))
}
}
}
+ available_join_on_columns
+ }
+
+ fn gen_bool_with_tables(&mut self, tables: Vec) -> Expr {
+ let old_context = self.new_local_context();
+ self.add_relations_to_context(tables);
+ let expr = self.gen_expr(&Boolean, SqlGeneratorContext::new_with_can_agg(false));
+ self.restore_context(old_context);
+ expr
+ }
+
+ fn gen_single_equi_join_expr(
+ &mut self,
+ left_columns: Vec,
+ right_columns: Vec,
+ ) -> Option<(Expr, Vec<(Column, Column)>)> {
+ let mut available_join_on_columns = self.gen_equi_join_columns(left_columns, right_columns);
if available_join_on_columns.is_empty() {
return None;
}
- let i = self.rng.gen_range(0..available_join_on_columns.len());
- let (left_column, right_column) = available_join_on_columns[i];
- Some((left_column.clone(), right_column.clone()))
+ available_join_on_columns.shuffle(&mut self.rng);
+ let remaining_columns = available_join_on_columns.split_off(1);
+ let (left_column, right_column) = available_join_on_columns.drain(..).next().unwrap();
+ let join_on_expr = create_equi_expr(left_column.name, right_column.name);
+ Some((join_on_expr, remaining_columns))
+ }
+
+ fn gen_non_equi_expr(&mut self, available_join_on_columns: Vec<(Column, Column)>) -> Expr {
+ let expr = Expr::Value(Value::Boolean(true));
+ if available_join_on_columns.is_empty() {
+ return expr;
+ }
+ let n = self.rng.gen_range(0..available_join_on_columns.len());
+ let mut count = 0;
+ for (l_col, r_col) in available_join_on_columns {
+ if count >= n {
+ break;
+ }
+ let Some(inequality_ops) = BINARY_INEQUALITY_OP_TABLE.get(&(l_col.data_type, r_col.data_type)) else {
+ continue;
+ };
+ let inequality_op = inequality_ops.choose(&mut self.rng).unwrap();
+ let _non_equi_expr = create_binary_expr(inequality_op.clone(), l_col.name, r_col.name);
+ count += 1;
+ }
+ expr
+ }
+
+ fn gen_more_equi_join_exprs(
+ &mut self,
+ mut available_join_on_columns: Vec<(Column, Column)>,
+ ) -> Expr {
+ let mut expr = Expr::Value(Value::Boolean(true));
+ if available_join_on_columns.is_empty() {
+ return expr;
+ }
+ let n_join_cols = available_join_on_columns.len();
+ let n = if n_join_cols < 2 {
+ n_join_cols
+ } else {
+ match self.rng.gen_range(0..100) {
+ 0..=10 => self.rng.gen_range(n_join_cols / 2..n_join_cols),
+ 11..=100 => self.rng.gen_range(0..n_join_cols / 2),
+ _ => unreachable!(),
+ }
+ };
+
+ for (l_col, r_col) in available_join_on_columns.drain(0..n) {
+ let equi_expr = create_equi_expr(l_col.name, r_col.name);
+ expr = Expr::BinaryOp {
+ left: Box::new(expr),
+ op: BinaryOperator::And,
+ right: Box::new(equi_expr),
+ }
+ }
+ expr
+ }
+
+ fn gen_arbitrary_bool(&mut self, left_table: Table, right_table: Table) -> Option {
+ let expr = self.gen_bool_with_tables(vec![left_table, right_table]);
+
+ // FIXME(noel): This is a hack to reduce streaming nested loop join occurrences.
+ // ... JOIN ON x=y AND false => ... JOIN ON x=y
+ // We can use const folding, then remove the right expression,
+ // if it evaluates to `false` after const folding.
+ // Have to first bind `Expr`, since it is AST form.
+ // Then if successfully bound, use `eval_row_const` to constant fold it.
+ // Take a look at .
+ if expr != Expr::Value(Value::Boolean(false)) {
+ Some(expr)
+ } else {
+ None
+ }
}
/// Generates the `ON` clause in `t JOIN t2 ON ...`
/// It will generate at least one equi join condition
/// This will reduce chance of nested loop join from being generated.
- /// TODO: Generate equi-join on different types.
fn gen_join_on_expr(
&mut self,
left_columns: Vec,
- left_table: Vec,
+ left_table: Table,
right_columns: Vec,
- right_table: Vec,
+ right_table: Table,
) -> Option {
// We always generate an equi join, to avoid stream nested loop join.
- let Some((l, r)) = self.gen_equi_join_columns(left_columns, right_columns) else {
- return None;
+ let Some((base_join_on_expr, remaining_equi_columns)) = self.gen_single_equi_join_expr(left_columns, right_columns) else {
+ return None
};
- let mut join_on_expr = create_equi_expr(l.name, r.name);
- // Add extra boolean expressions
- if self.flip_coin() {
- let old_context = self.new_local_context();
- self.add_relations_to_context(left_table);
- self.add_relations_to_context(right_table);
- let expr = self.gen_expr(&Boolean, SqlGeneratorContext::new_with_can_agg(false));
- self.restore_context(old_context);
- // FIXME(noel): Hack to reduce streaming nested loop join occurrences.
- // ... JOIN ON x=y AND false => ... JOIN ON x=y
- // We can use const folding, then remove the right expression,
- // if it evaluates to `false` after const folding.
- if expr != Expr::Value(Value::Boolean(false)) {
- join_on_expr = Expr::BinaryOp {
- left: Box::new(join_on_expr),
- op: BinaryOperator::And,
- right: Box::new(expr),
- }
- }
+
+ // Add more expressions
+ let extra_expr = match self.rng.gen_range(1..=100) {
+ 1..=25 => None,
+ 26..=50 => Some(self.gen_non_equi_expr(remaining_equi_columns)),
+ 51..=75 => Some(self.gen_more_equi_join_exprs(remaining_equi_columns)),
+ 76..=100 => self.gen_arbitrary_bool(left_table, right_table),
+ _ => unreachable!(),
+ };
+ if let Some(extra_expr) = extra_expr {
+ Some(Expr::BinaryOp {
+ left: Box::new(base_join_on_expr),
+ op: BinaryOperator::And,
+ right: Box::new(extra_expr),
+ })
+ } else {
+ Some(base_join_on_expr)
}
- Some(join_on_expr)
}
fn gen_join_constraint(
&mut self,
left_columns: Vec,
- left_table: Vec,
+ left_table: Table,
right_columns: Vec,
- right_table: Vec,
+ right_table: Table,
) -> Option {
let expr = self.gen_join_on_expr(left_columns, left_table, right_columns, right_table)?;
Some(JoinConstraint::On(expr))
}
/// Generates t1 JOIN t2 ON ...
- fn gen_join_clause(&mut self) -> (TableWithJoins, Vec) {
- let (left_factor, left_columns, mut left_table) = self.gen_table_factor();
- let (right_factor, right_columns, mut right_table) = self.gen_table_factor();
-
- let join_constraint = self.gen_join_constraint(
+ fn gen_join_operator(
+ &mut self,
+ left_columns: Vec,
+ left_table: Table,
+ right_columns: Vec,
+ right_table: Table,
+ ) -> Option {
+ let Some(join_constraint) = self.gen_join_constraint(
left_columns,
- left_table.clone(),
+ left_table,
right_columns,
- right_table.clone(),
- );
- let Some(join_constraint) = join_constraint else {
- return self.gen_simple_table();
+ right_table,
+ ) else {
+ return None;
};
// NOTE: INNER JOIN works fine, usually does not encounter `StreamNestedLoopJoin` much.
@@ -187,18 +279,67 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
// _ => JoinOperator::CrossJoin,
};
+ Some(join_operator)
+ }
+
+ /// Generates t1 JOIN t2 ON ...
+ fn gen_simple_join_clause(&mut self) -> Option<(TableWithJoins, Vec)> {
+ let (left_factor, left_columns, left_table) = self.gen_table_factor();
+ let (right_factor, right_columns, right_table) = self.gen_table_factor();
+ let Some(join_operator) =
+ self.gen_join_operator(left_columns, left_table.clone(), right_columns, right_table.clone()) else {
+ return None;
+ };
+
let right_factor_with_join = Join {
relation: right_factor,
join_operator,
};
- // TODO: Different structures of joins (bushy, left, right, cycles)
- left_table.append(&mut right_table);
- (
+ Some((
TableWithJoins {
relation: left_factor,
joins: vec![right_factor_with_join],
},
- left_table,
+ vec![left_table, right_table],
+ ))
+ }
+
+ fn gen_more_joins(&mut self) -> (TableWithJoins, Vec) {
+ // gen left
+ let Some((left_table_with_join, mut left_tables)) = self.gen_simple_join_clause() else {
+ return self.gen_simple_table();
+ };
+ let left_columns = left_tables
+ .iter()
+ .flat_map(|t| t.get_qualified_columns())
+ .collect();
+
+ // gen right
+ let (right_factor, right_columns, right_table) = self.gen_table_factor();
+
+ // gen join
+ let left_table = left_tables.choose(&mut self.rng).unwrap();
+ let Some(join_operator) =
+ self.gen_join_operator(left_columns, left_table.clone(), right_columns, right_table.clone()) else {
+ return (left_table_with_join, left_tables);
+ };
+
+ // build result
+ let mut tables = vec![];
+ tables.append(&mut left_tables);
+ tables.push(right_table);
+
+ let right_join = Join {
+ relation: right_factor,
+ join_operator,
+ };
+
+ (
+ TableWithJoins {
+ relation: TableFactor::NestedJoin(Box::new(left_table_with_join)),
+ joins: vec![right_join],
+ },
+ tables,
)
}
diff --git a/src/tests/sqlsmith/src/sql_gen/scalar.rs b/src/tests/sqlsmith/src/sql_gen/scalar.rs
index e79fdba9b4dcb..2383e361c4a51 100644
--- a/src/tests/sqlsmith/src/sql_gen/scalar.rs
+++ b/src/tests/sqlsmith/src/sql_gen/scalar.rs
@@ -108,7 +108,6 @@ impl<'a, R: Rng> SqlGenerator<'a, R> {
}
/// Generates a list of [`n`] simple scalar values of a specific [`type`].
- #[allow(dead_code)]
fn gen_simple_scalar_list(&mut self, ty: &DataType, n: usize) -> Vec {
(0..n).map(|_| self.gen_simple_scalar(ty)).collect()
}
diff --git a/src/tests/sqlsmith/src/sql_gen/types.rs b/src/tests/sqlsmith/src/sql_gen/types.rs
index 561b38d5c26ae..cc88a4095238f 100644
--- a/src/tests/sqlsmith/src/sql_gen/types.rs
+++ b/src/tests/sqlsmith/src/sql_gen/types.rs
@@ -25,7 +25,7 @@ use risingwave_expr::sig::agg::{agg_func_sigs, AggFuncSig as RwAggFuncSig};
use risingwave_expr::sig::cast::{cast_sigs, CastContext, CastSig as RwCastSig};
use risingwave_expr::sig::func::{func_sigs, FuncSign as RwFuncSig};
use risingwave_frontend::expr::ExprType;
-use risingwave_sqlparser::ast::{DataType as AstDataType, StructField};
+use risingwave_sqlparser::ast::{BinaryOperator, DataType as AstDataType, StructField};
pub(super) fn data_type_to_ast_data_type(data_type: &DataType) -> AstDataType {
match data_type {
@@ -244,3 +244,53 @@ pub(crate) static IMPLICIT_CAST_TABLE: LazyLock>>
.for_each(|cast| casts.entry(cast.to_type.clone()).or_default().push(cast));
casts
});
+
+fn expr_type_to_inequality_op(typ: ExprType) -> Option {
+ match typ {
+ ExprType::GreaterThan => Some(BinaryOperator::Gt),
+ ExprType::GreaterThanOrEqual => Some(BinaryOperator::GtEq),
+ ExprType::LessThan => Some(BinaryOperator::Lt),
+ ExprType::LessThanOrEqual => Some(BinaryOperator::LtEq),
+ ExprType::NotEqual => Some(BinaryOperator::NotEq),
+ _ => None,
+ }
+}
+
+/// Build set of binary inequality functions like `>`, `<`, etc...
+/// Maps from LHS, RHS argument to Inequality Operation
+/// For instance:
+/// GreaterThanOrEqual(Int16, Int64) -> Boolean
+/// Will store an entry of:
+/// Key: Int16, Int64
+/// Value: `BinaryOp::GreaterThanOrEqual`
+/// in the table.
+pub(crate) static BINARY_INEQUALITY_OP_TABLE: LazyLock<
+ HashMap<(DataType, DataType), Vec>,
+> = LazyLock::new(|| {
+ let mut funcs = HashMap::<(DataType, DataType), Vec>::new();
+ func_sigs()
+ .filter(|func| {
+ !FUNC_BAN_LIST.contains(&func.func)
+ && func.ret_type == DataTypeName::Boolean
+ && func.inputs_type.len() == 2
+ && func
+ .inputs_type
+ .iter()
+ .all(|t| *t != DataTypeName::Timestamptz)
+ })
+ .filter_map(|func| {
+ let Some(lhs) = data_type_name_to_ast_data_type(&func.inputs_type[0]) else {
+ return None;
+ };
+ let Some(rhs) = data_type_name_to_ast_data_type(&func.inputs_type[1]) else {
+ return None;
+ };
+ let args = (lhs, rhs);
+ let Some(op) = expr_type_to_inequality_op(func.func) else {
+ return None;
+ };
+ Some((args, op))
+ })
+ .for_each(|(args, op)| funcs.entry(args).or_default().push(op));
+ funcs
+});