From 688a4956c2f13f6310b35ffbcde342d95dd4c9db Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Tue, 4 Apr 2023 14:58:27 +0800 Subject: [PATCH 1/2] fix(sqlparser): support more PostgreSQL `trim` syntax --- e2e_test/batch/functions/trim.slt.part | 45 +++++++++++++++++++++++++ src/frontend/src/binder/expr/mod.rs | 25 ++++++++------ src/sqlparser/src/ast/mod.rs | 25 ++++++++------ src/sqlparser/src/parser.rs | 31 ++++++++++++----- src/sqlparser/tests/sqlparser_common.rs | 2 +- src/tests/regress/data/sql/strings.sql | 10 +++--- src/tests/sqlsmith/src/sql_gen/expr.rs | 7 ++-- 7 files changed, 106 insertions(+), 39 deletions(-) diff --git a/e2e_test/batch/functions/trim.slt.part b/e2e_test/batch/functions/trim.slt.part index a9d9aef19b8a2..0ad8e03c219b9 100644 --- a/e2e_test/batch/functions/trim.slt.part +++ b/e2e_test/batch/functions/trim.slt.part @@ -13,6 +13,51 @@ select trim(trailing 'cba' from 'abcxyzabc'); ---- abcxyz +query T +select trim('cba' from 'abcxyzabc'); +---- +xyz + +query T +select trim(both from ' xyz '); +---- +xyz + +query T +select trim(from ' xyz '); +---- +xyz + +query T +select trim(both from 'abcxyzabc', 'cba'); +---- +xyz + +query T +select trim(both 'abcxyzabc', 'cba'); +---- +xyz + +query T +select trim(from 'abcxyzabc', 'cba'); +---- +xyz + +query T +select trim('abcxyzabc', 'cba'); +---- +xyz + +query T +select trim(both ' xyz '); +---- +xyz + +query T +select trim(' xyz '); +---- +xyz + query T select ltrim('abcxyzabc', 'bca'); ---- diff --git a/src/frontend/src/binder/expr/mod.rs b/src/frontend/src/binder/expr/mod.rs index 867eedcfd4848..e0ab4606d9572 100644 --- a/src/frontend/src/binder/expr/mod.rs +++ b/src/frontend/src/binder/expr/mod.rs @@ -111,7 +111,11 @@ impl Binder { time_zone, } => self.bind_at_time_zone(*timestamp, time_zone), // special syntaxt for string - Expr::Trim { expr, trim_where } => self.bind_trim(*expr, trim_where), + Expr::Trim { + expr, + trim_where, + trim_what, + } => self.bind_trim(*expr, trim_where, trim_what), Expr::Substring { expr, substring_from, @@ -243,21 +247,20 @@ impl Binder { pub(super) fn bind_trim( &mut self, expr: Expr, - // ([BOTH | LEADING | TRAILING], ) - trim_where: Option<(TrimWhereField, Box)>, + // BOTH | LEADING | TRAILING + trim_where: Option, + trim_what: Option>, ) -> Result { let mut inputs = vec![self.bind_expr(expr)?]; let func_type = match trim_where { - Some(t) => { - inputs.push(self.bind_expr(*t.1)?); - match t.0 { - TrimWhereField::Both => ExprType::Trim, - TrimWhereField::Leading => ExprType::Ltrim, - TrimWhereField::Trailing => ExprType::Rtrim, - } - } + Some(TrimWhereField::Both) => ExprType::Trim, + Some(TrimWhereField::Leading) => ExprType::Ltrim, + Some(TrimWhereField::Trailing) => ExprType::Rtrim, None => ExprType::Trim, }; + if let Some(t) = trim_what { + inputs.push(self.bind_expr(*t)?); + } Ok(FunctionCall::new(func_type, inputs)?.into()) } diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index 79af8fd138e33..bcc0576f2bdd3 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -345,13 +345,14 @@ pub enum Expr { start: Box, count: Option>, }, - /// TRIM([BOTH | LEADING | TRAILING] [FROM ])\ + /// TRIM([BOTH | LEADING | TRAILING] [] FROM )\ /// Or\ - /// TRIM() + /// TRIM([BOTH | LEADING | TRAILING] [FROM] [, ]) Trim { expr: Box, // ([BOTH | LEADING | TRAILING], ) - trim_where: Option<(TrimWhereField, Box)>, + trim_where: Option, + trim_what: Option>, }, /// `expr COLLATE collation` Collate { @@ -568,15 +569,19 @@ impl fmt::Display for Expr { } Expr::IsDistinctFrom(a, b) => write!(f, "{} IS DISTINCT FROM {}", a, b), Expr::IsNotDistinctFrom(a, b) => write!(f, "{} IS NOT DISTINCT FROM {}", a, b), - Expr::Trim { expr, trim_where } => { + Expr::Trim { + expr, + trim_where, + trim_what, + } => { write!(f, "TRIM(")?; - if let Some((ident, trim_char)) = trim_where { - write!(f, "{} {} FROM {}", ident, trim_char, expr)?; - } else { - write!(f, "{}", expr)?; + if let Some(ident) = trim_where { + write!(f, "{} ", ident)?; } - - write!(f, ")") + if let Some(trim_char) = trim_what { + write!(f, "{} ", trim_char)?; + } + write!(f, "FROM {})", expr) } Expr::Row(exprs) => write!( f, diff --git a/src/sqlparser/src/parser.rs b/src/sqlparser/src/parser.rs index ff784093fb0a3..e1aa820ebbb8d 100644 --- a/src/sqlparser/src/parser.rs +++ b/src/sqlparser/src/parser.rs @@ -955,28 +955,41 @@ impl Parser { }) } - /// TRIM (WHERE 'text' FROM 'text')\ - /// TRIM ('text') + /// TRIM ([WHERE] ['text'] FROM 'text')\ + /// TRIM ([WHERE] [FROM] 'text' [, 'text']) pub fn parse_trim_expr(&mut self) -> Result { self.expect_token(&Token::LParen)?; - let mut where_expr = None; + let mut trim_where = None; if let Token::Word(word) = self.peek_token().token { if [Keyword::BOTH, Keyword::LEADING, Keyword::TRAILING] .iter() .any(|d| word.keyword == *d) { - let trim_where = self.parse_trim_where()?; - let sub_expr = self.parse_expr()?; - self.expect_keyword(Keyword::FROM)?; - where_expr = Some((trim_where, Box::new(sub_expr))); + trim_where = Some(self.parse_trim_where()?); } } - let expr = self.parse_expr()?; + + let (mut trim_what, expr) = if self.parse_keyword(Keyword::FROM) { + (None, self.parse_expr()?) + } else { + let mut expr = self.parse_expr()?; + if self.parse_keyword(Keyword::FROM) { + let trim_what = std::mem::replace(&mut expr, self.parse_expr()?); + (Some(Box::new(trim_what)), expr) + } else { + (None, expr) + } + }; + + if trim_what.is_none() && self.consume_token(&Token::Comma) { + trim_what = Some(Box::new(self.parse_expr()?)); + } self.expect_token(&Token::RParen)?; Ok(Expr::Trim { expr: Box::new(expr), - trim_where: where_expr, + trim_where, + trim_what, }) } diff --git a/src/sqlparser/tests/sqlparser_common.rs b/src/sqlparser/tests/sqlparser_common.rs index 361940cc3d0b0..c5184e58ccdfb 100644 --- a/src/sqlparser/tests/sqlparser_common.rs +++ b/src/sqlparser/tests/sqlparser_common.rs @@ -2865,7 +2865,7 @@ fn parse_trim() { "SELECT TRIM(TRAILING 'xyz' FROM 'xyzfooxyz')", ); - one_statement_parses_to("SELECT TRIM(' foo ')", "SELECT TRIM(' foo ')"); + one_statement_parses_to("SELECT TRIM(' foo ')", "SELECT TRIM(FROM ' foo ')"); let res = parse_sql_statements("SELECT TRIM(FOO 'xyz' FROM 'xyzfooxyz')"); diff --git a/src/tests/regress/data/sql/strings.sql b/src/tests/regress/data/sql/strings.sql index b8316ea49698a..fe7f3c8561486 100644 --- a/src/tests/regress/data/sql/strings.sql +++ b/src/tests/regress/data/sql/strings.sql @@ -118,11 +118,11 @@ SELECT E'De\\678dBeEf'::bytea; -- -- E021-09 trim function ---@ SELECT TRIM(BOTH FROM ' bunch o blanks ') = 'bunch o blanks' AS "bunch o blanks"; ---@ ---@ SELECT TRIM(LEADING FROM ' bunch o blanks ') = 'bunch o blanks ' AS "bunch o blanks "; ---@ ---@ SELECT TRIM(TRAILING FROM ' bunch o blanks ') = ' bunch o blanks' AS " bunch o blanks"; +SELECT TRIM(BOTH FROM ' bunch o blanks ') = 'bunch o blanks' AS "bunch o blanks"; + +SELECT TRIM(LEADING FROM ' bunch o blanks ') = 'bunch o blanks ' AS "bunch o blanks "; + +SELECT TRIM(TRAILING FROM ' bunch o blanks ') = ' bunch o blanks' AS " bunch o blanks"; SELECT TRIM(BOTH 'x' FROM 'xxxxxsome Xsxxxxx') = 'some Xs' AS "some Xs"; diff --git a/src/tests/sqlsmith/src/sql_gen/expr.rs b/src/tests/sqlsmith/src/sql_gen/expr.rs index 1447f6ea2ced5..ac288a0dd4723 100644 --- a/src/tests/sqlsmith/src/sql_gen/expr.rs +++ b/src/tests/sqlsmith/src/sql_gen/expr.rs @@ -579,14 +579,15 @@ fn make_trim(func: ExprType, exprs: Vec) -> Expr { E::Rtrim => TrimWhereField::Trailing, _ => unreachable!(), }; - let trim_where = if exprs.len() > 1 { - Some((trim_type, Box::new(exprs[1].clone()))) + let trim_what = if exprs.len() > 1 { + Some(Box::new(exprs[1].clone())) } else { None }; Expr::Trim { expr: Box::new(exprs[0].clone()), - trim_where, + trim_where: Some(trim_type), + trim_what, } } From 9e0aa9d7ecd389b7c5c93c4b4b606a30b45d1119 Mon Sep 17 00:00:00 2001 From: Xiangjin Date: Tue, 4 Apr 2023 15:04:17 +0800 Subject: [PATCH 2/2] bind btrim --- e2e_test/batch/functions/trim.slt.part | 5 +++++ src/frontend/src/binder/expr/function.rs | 1 + 2 files changed, 6 insertions(+) diff --git a/e2e_test/batch/functions/trim.slt.part b/e2e_test/batch/functions/trim.slt.part index 0ad8e03c219b9..730d35f64868e 100644 --- a/e2e_test/batch/functions/trim.slt.part +++ b/e2e_test/batch/functions/trim.slt.part @@ -67,3 +67,8 @@ query T select rtrim('abcxyzabc', 'bca'); ---- abcxyz + +query T +select btrim('abcxyzabc', 'bca'); +---- +xyz diff --git a/src/frontend/src/binder/expr/function.rs b/src/frontend/src/binder/expr/function.rs index 63dd63d6d596a..137d5c10ded41 100644 --- a/src/frontend/src/binder/expr/function.rs +++ b/src/frontend/src/binder/expr/function.rs @@ -372,6 +372,7 @@ impl Binder { ("replace", raw_call(ExprType::Replace)), ("overlay", raw_call(ExprType::Overlay)), ("position", raw_call(ExprType::Position)), + ("btrim", raw_call(ExprType::Trim)), ("ltrim", raw_call(ExprType::Ltrim)), ("rtrim", raw_call(ExprType::Rtrim)), ("md5", raw_call(ExprType::Md5)),