Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve parsing speed by avoiding some clones in parse_identifier #1624

Merged
merged 2 commits into from
Dec 29, 2024
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 36 additions & 24 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -970,15 +970,15 @@ impl<'a> Parser<'a> {
t @ (Token::Word(_) | Token::SingleQuotedString(_)) => {
if self.peek_token().token == Token::Period {
let mut id_parts: Vec<Ident> = vec![match t {
Token::Word(w) => w.to_ident(next_token.span),
Token::Word(w) => w.into_ident(next_token.span),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

w.to_ident cloned (copied) the string. info_ident simply reuses the string

Token::SingleQuotedString(s) => Ident::with_quote('\'', s),
_ => unreachable!(), // We matched above
}];

while self.consume_token(&Token::Period) {
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => id_parts.push(w.to_ident(next_token.span)),
Token::Word(w) => id_parts.push(w.into_ident(next_token.span)),
Token::SingleQuotedString(s) => {
// SQLite has single-quoted identifiers
id_parts.push(Ident::with_quote('\'', s))
Expand Down Expand Up @@ -1108,7 +1108,7 @@ impl<'a> Parser<'a> {
if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
{
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
name: ObjectName(vec![w.clone().into_ident(w_span)]),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in this case I couldn't figure out (yet) how to avoid this clone given that &w is passed in

This PR doesn't increase the number of clones done, but it makes it more explicit when they are happening

uses_odbc_syntax: false,
parameters: FunctionArguments::None,
args: FunctionArguments::None,
Expand All @@ -1123,7 +1123,7 @@ impl<'a> Parser<'a> {
| Keyword::CURRENT_DATE
| Keyword::LOCALTIME
| Keyword::LOCALTIMESTAMP => {
Ok(Some(self.parse_time_functions(ObjectName(vec![w.to_ident(w_span)]))?))
Ok(Some(self.parse_time_functions(ObjectName(vec![w.clone().into_ident(w_span)]))?))
}
Keyword::CASE => Ok(Some(self.parse_case_expr()?)),
Keyword::CONVERT => Ok(Some(self.parse_convert_expr(false)?)),
Expand All @@ -1148,7 +1148,7 @@ impl<'a> Parser<'a> {
Keyword::CEIL => Ok(Some(self.parse_ceil_floor_expr(true)?)),
Keyword::FLOOR => Ok(Some(self.parse_ceil_floor_expr(false)?)),
Keyword::POSITION if self.peek_token_ref().token == Token::LParen => {
Ok(Some(self.parse_position_expr(w.to_ident(w_span))?))
Ok(Some(self.parse_position_expr(w.clone().into_ident(w_span))?))
}
Keyword::SUBSTRING => Ok(Some(self.parse_substring_expr()?)),
Keyword::OVERLAY => Ok(Some(self.parse_overlay_expr()?)),
Expand All @@ -1167,7 +1167,7 @@ impl<'a> Parser<'a> {
let query = self.parse_query()?;
self.expect_token(&Token::RParen)?;
Ok(Some(Expr::Function(Function {
name: ObjectName(vec![w.to_ident(w_span)]),
name: ObjectName(vec![w.clone().into_ident(w_span)]),
uses_odbc_syntax: false,
parameters: FunctionArguments::None,
args: FunctionArguments::Subquery(query),
Expand Down Expand Up @@ -1203,11 +1203,12 @@ impl<'a> Parser<'a> {
w_span: Span,
) -> Result<Expr, ParserError> {
match self.peek_token().token {
Token::Period => {
self.parse_compound_field_access(Expr::Identifier(w.to_ident(w_span)), vec![])
}
Token::Period => self.parse_compound_field_access(
Expr::Identifier(w.clone().into_ident(w_span)),
vec![],
),
Token::LParen => {
let id_parts = vec![w.to_ident(w_span)];
let id_parts = vec![w.clone().into_ident(w_span)];
if let Some(expr) = self.parse_outer_join_expr(&id_parts) {
Ok(expr)
} else {
Expand All @@ -1220,7 +1221,7 @@ impl<'a> Parser<'a> {
}
Token::LBracket if dialect_of!(self is PostgreSqlDialect | DuckDbDialect | GenericDialect | ClickHouseDialect | BigQueryDialect) =>
{
let ident = Expr::Identifier(w.to_ident(w_span));
let ident = Expr::Identifier(w.clone().into_ident(w_span));
let mut fields = vec![];
self.parse_multi_dim_subscript(&mut fields)?;
self.parse_compound_field_access(ident, fields)
Expand Down Expand Up @@ -1250,11 +1251,11 @@ impl<'a> Parser<'a> {
Token::Arrow if self.dialect.supports_lambda_functions() => {
self.expect_token(&Token::Arrow)?;
Ok(Expr::Lambda(LambdaFunction {
params: OneOrManyWithParens::One(w.to_ident(w_span)),
params: OneOrManyWithParens::One(w.clone().into_ident(w_span)),
body: Box::new(self.parse_expr()?),
}))
}
_ => Ok(Expr::Identifier(w.to_ident(w_span))),
_ => Ok(Expr::Identifier(w.clone().into_ident(w_span))),
}
}

Expand Down Expand Up @@ -1438,7 +1439,7 @@ impl<'a> Parser<'a> {
} else {
let tok = self.next_token();
let key = match tok.token {
Token::Word(word) => word.to_ident(tok.span),
Token::Word(word) => word.into_ident(tok.span),
_ => {
return parser_err!(
format!("Expected identifier, found: {tok}"),
Expand Down Expand Up @@ -1490,7 +1491,7 @@ impl<'a> Parser<'a> {
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => {
let expr = Expr::Identifier(w.to_ident(next_token.span));
let expr = Expr::Identifier(w.into_ident(next_token.span));
chain.push(AccessExpr::Dot(expr));
if self.peek_token().token == Token::LBracket {
if self.dialect.supports_partiql() {
Expand Down Expand Up @@ -1670,7 +1671,7 @@ impl<'a> Parser<'a> {
while p.consume_token(&Token::Period) {
let tok = p.next_token();
let name = match tok.token {
Token::Word(word) => word.to_ident(tok.span),
Token::Word(word) => word.into_ident(tok.span),
_ => return p.expected("identifier", tok),
};
let func = match p.parse_function(ObjectName(vec![name]))? {
Expand Down Expand Up @@ -8242,7 +8243,7 @@ impl<'a> Parser<'a> {
// This because snowflake allows numbers as placeholders
let next_token = self.next_token();
let ident = match next_token.token {
Token::Word(w) => Ok(w.to_ident(next_token.span)),
Token::Word(w) => Ok(w.into_ident(next_token.span)),
Token::Number(w, false) => Ok(Ident::new(w)),
_ => self.expected("placeholder", next_token),
}?;
Expand Down Expand Up @@ -8753,7 +8754,7 @@ impl<'a> Parser<'a> {
// (For example, in `FROM t1 JOIN` the `JOIN` will always be parsed as a keyword,
// not an alias.)
Token::Word(w) if after_as || !reserved_kwds.contains(&w.keyword) => {
Ok(Some(w.to_ident(next_token.span)))
Ok(Some(w.into_ident(next_token.span)))
}
// MSSQL supports single-quoted strings as aliases for columns
// We accept them as table aliases too, although MSSQL does not.
Expand Down Expand Up @@ -8920,7 +8921,7 @@ impl<'a> Parser<'a> {
loop {
match &self.peek_token_ref().token {
Token::Word(w) => {
idents.push(w.to_ident(self.peek_token_ref().span));
idents.push(w.clone().into_ident(self.peek_token_ref().span));
}
Token::EOF | Token::Eq => break,
_ => {}
Expand Down Expand Up @@ -8975,7 +8976,7 @@ impl<'a> Parser<'a> {
// expecting at least one word for identifier
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => idents.push(w.to_ident(next_token.span)),
Token::Word(w) => idents.push(w.into_ident(next_token.span)),
Token::EOF => {
return Err(ParserError::ParserError(
"Empty input when parsing identifier".to_string(),
Expand All @@ -8995,7 +8996,7 @@ impl<'a> Parser<'a> {
Token::Period => {
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => idents.push(w.to_ident(next_token.span)),
Token::Word(w) => idents.push(w.into_ident(next_token.span)),
Token::EOF => {
return Err(ParserError::ParserError(
"Trailing period in identifier".to_string(),
Expand Down Expand Up @@ -9024,7 +9025,7 @@ impl<'a> Parser<'a> {
pub fn parse_identifier(&mut self) -> Result<Ident, ParserError> {
let next_token = self.next_token();
match next_token.token {
Token::Word(w) => Ok(w.to_ident(next_token.span)),
Token::Word(w) => Ok(w.into_ident(next_token.span)),
Token::SingleQuotedString(s) => Ok(Ident::with_quote('\'', s)),
Token::DoubleQuotedString(s) => Ok(Ident::with_quote('\"', s)),
_ => self.expected("identifier", next_token),
Expand All @@ -9044,9 +9045,10 @@ impl<'a> Parser<'a> {
fn parse_unquoted_hyphenated_identifier(&mut self) -> Result<(Ident, bool), ParserError> {
match self.peek_token().token {
Token::Word(w) => {
let quote_style_is_none = w.quote_style.is_none();
let mut requires_whitespace = false;
let mut ident = w.to_ident(self.next_token().span);
if w.quote_style.is_none() {
let mut ident = w.into_ident(self.next_token().span);
if quote_style_is_none {
while matches!(self.peek_token_no_skip().token, Token::Minus) {
self.next_token();
ident.value.push('-');
Expand Down Expand Up @@ -13475,13 +13477,23 @@ impl<'a> Parser<'a> {
}

impl Word {
#[deprecated(since = "0.55.0", note = "please use `into_ident` instead")]
alamb marked this conversation as resolved.
Show resolved Hide resolved
pub fn to_ident(&self, span: Span) -> Ident {
Ident {
value: self.value.clone(),
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

note to_indent clones self.value

quote_style: self.quote_style,
span,
}
}

/// Convert this word into an [`Ident`] identifier
pub fn into_ident(self, span: Span) -> Ident {
Ident {
value: self.value,
quote_style: self.quote_style,
span,
}
}
}

#[cfg(test)]
Expand Down
Loading