Skip to content

Commit

Permalink
Add APIs to reuse token buffers in Tokenizer (#1094)
Browse files Browse the repository at this point in the history
  • Loading branch information
0rphon authored Jan 22, 2024
1 parent b0b6288 commit d72f0a9
Show file tree
Hide file tree
Showing 3 changed files with 32 additions and 4 deletions.
5 changes: 5 additions & 0 deletions src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8711,6 +8711,11 @@ impl<'a> Parser<'a> {
self.expect_token(&Token::RParen)?;
Ok(partitions)
}

/// Consume the parser and return its underlying token buffer
pub fn into_tokens(self) -> Vec<TokenWithLocation> {
self.tokens
}
}

impl Word {
Expand Down
17 changes: 13 additions & 4 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -543,21 +543,30 @@ impl<'a> Tokenizer<'a> {

/// Tokenize the statement and produce a vector of tokens with location information
pub fn tokenize_with_location(&mut self) -> Result<Vec<TokenWithLocation>, TokenizerError> {
let mut tokens: Vec<TokenWithLocation> = vec![];
self.tokenize_with_location_into_buf(&mut tokens)
.map(|_| tokens)
}

/// Tokenize the statement and append tokens with location information into the provided buffer.
/// If an error is thrown, the buffer will contain all tokens that were successfully parsed before the error.
pub fn tokenize_with_location_into_buf(
&mut self,
buf: &mut Vec<TokenWithLocation>,
) -> Result<(), TokenizerError> {
let mut state = State {
peekable: self.query.chars().peekable(),
line: 1,
col: 1,
};

let mut tokens: Vec<TokenWithLocation> = vec![];

let mut location = state.location();
while let Some(token) = self.next_token(&mut state)? {
tokens.push(TokenWithLocation { token, location });
buf.push(TokenWithLocation { token, location });

location = state.location();
}
Ok(tokens)
Ok(())
}

// Tokenize the identifer or keywords in `ch`
Expand Down
14 changes: 14 additions & 0 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ use sqlparser::dialect::{
};
use sqlparser::keywords::ALL_KEYWORDS;
use sqlparser::parser::{Parser, ParserError, ParserOptions};
use sqlparser::tokenizer::Tokenizer;
use test_utils::{
all_dialects, alter_table_op, assert_eq_vec, expr_from_projection, join, number, only, table,
table_alias, TestedDialects,
Expand Down Expand Up @@ -8080,3 +8081,16 @@ fn test_release_savepoint() {

one_statement_parses_to("RELEASE test1", "RELEASE SAVEPOINT test1");
}

#[test]
fn test_buffer_reuse() {
let d = GenericDialect {};
let q = "INSERT INTO customer WITH foo AS (SELECT 1) SELECT * FROM foo UNION VALUES (1)";
let mut buf = Vec::new();
Tokenizer::new(&d, q)
.tokenize_with_location_into_buf(&mut buf)
.unwrap();
let mut p = Parser::new(&d).with_tokens_with_locations(buf);
p.parse_statements().unwrap();
let _ = p.into_tokens();
}

0 comments on commit d72f0a9

Please sign in to comment.