Skip to content

Commit

Permalink
refactor parser codegen (#497)
Browse files Browse the repository at this point in the history
- move parsing tokens to a common function
- remove the need to pass token names as static strings everywhere,
since we already have `strum::AsRefStr` deriver

This decreases nesting, improves readability of the parser code, and
decreases the generated code by roughly 30%.
But most importantly, it unblocks #498 and #500 by making it easier to
generate named or unnamed nodes at each parser root.
  • Loading branch information
OmarTawfik authored Jun 16, 2023
1 parent 5363d54 commit 40c3daf
Show file tree
Hide file tree
Showing 15 changed files with 4,885 additions and 20,473 deletions.
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

119 changes: 93 additions & 26 deletions crates/codegen/syntax/src/code_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -269,15 +269,103 @@ impl CodeGenerator {
let token_kind = format_ident!("{name}");
if scanner.is_defined_for_all_versions() {
let function_name = format_ident!("scan_{name}", name = name.to_snake_case());
quote!{ ProductionKind::#production_kind => call_scanner(self, input, Language::#function_name, TokenKind::#token_kind, #name) }
quote!{ ProductionKind::#production_kind => call_scanner(self, input, Language::#function_name, TokenKind::#token_kind) }
} else {
let function_name = format_ident!("maybe_scan_{name}", name = name.to_snake_case());
quote!{ ProductionKind::#production_kind => try_call_scanner(self, input, Language::#function_name, TokenKind::#token_kind, #name) }
quote!{ ProductionKind::#production_kind => try_call_scanner(self, input, Language::#function_name, TokenKind::#token_kind) }
}
});
quote! { #(#invocations),* }
}

pub fn token_functions(&self) -> TokenStream {
return quote! {
#[inline]
fn parse_token_with_trivia<F>(
&self,
stream: &mut Stream,
scanner: F,
kind: TokenKind,
) -> ParserResult
where
F: Fn(&Self, &mut Stream) -> bool,
{
let leading_trivia = {
let save = stream.position();
match self.parse_leading_trivia(stream) {
Fail { .. } => {
stream.set_position(save);
None
}
Pass { node, .. } => Some(node),
}
};

let start = stream.position();

if !scanner(self, stream) {
return Fail {
error: ParseError::new(start, kind.as_ref()),
};
}

let end = stream.position();

let trailing_trivia = {
let save = stream.position();
match self.parse_trailing_trivia(stream) {
Fail { .. } => {
stream.set_position(save);
None
}
Pass { node, .. } => Some(node),
}
};

return Pass {
node: cst::Node::token(
kind,
Range { start, end },
leading_trivia,
trailing_trivia,
),
error: None,
};
}

#[inline]
fn parse_token<F>(
&self,
stream: &mut Stream,
scanner: F,
kind: TokenKind,
) -> ParserResult
where
F: Fn(&Self, &mut Stream) -> bool,
{
let start = stream.position();

if !scanner(self, stream) {
return Fail {
error: ParseError::new(start, kind.as_ref()),
};
}

let end = stream.position();

return Pass {
node: cst::Node::token(
kind,
Range { start, end },
None,
None,
),
error: None,
};
}
};
}

pub fn parser_functions(&self) -> String {
let functions = self
.parsers
Expand Down Expand Up @@ -427,33 +515,12 @@ impl CodeGenerator {
{scanning_macros}
impl Language {{
{trivia_functions}
{token_functions}
{parser_functions}
}}
",
trivia_functions = quote! {
fn optional_leading_trivia(&self, stream: &mut Stream) -> Option<Rc<cst::Node>> {
let save = stream.position();
match self.parse_leading_trivia(stream) {
Fail{ .. } => {
stream.set_position(save);
None
},
Pass{ node, .. } => Some(node),
}
}
fn optional_trailing_trivia(&self, stream: &mut Stream) -> Option<Rc<cst::Node>> {
let save = stream.position();
match self.parse_trailing_trivia(stream) {
Fail{ .. } => {
stream.set_position(save);
None
},
Pass{ node, .. } => Some(node),
}
}
},
parser_functions = self.parser_functions()
token_functions = self.token_functions(),
parser_functions = self.parser_functions(),
);

codegen
Expand Down
43 changes: 39 additions & 4 deletions crates/codegen/syntax/src/rust_lib_code_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -132,15 +132,50 @@ impl CodeGenerator {
let production_kinds = self.production_kinds();
quote! {
use serde::Serialize;
use strum_macros::*;

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
#[derive(
Clone,
Copy,
Debug,
PartialEq,
Eq,
PartialOrd,
Ord,
Serialize,
strum_macros::EnumString,
strum_macros::AsRefStr,
strum_macros::Display,
)]
#token_kinds

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
#[derive(
Clone,
Copy,
Debug,
PartialEq,
Eq,
PartialOrd,
Ord,
Serialize,
strum_macros::EnumString,
strum_macros::AsRefStr,
strum_macros::Display,
)]
#rule_kinds

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, EnumString, AsRefStr, Display)]
#[derive(
Clone,
Copy,
Debug,
PartialEq,
Eq,
PartialOrd,
Ord,
Serialize,
strum_macros::EnumString,
strum_macros::AsRefStr,
strum_macros::Display,
)]
#production_kinds
}
};
Expand Down
35 changes: 7 additions & 28 deletions crates/codegen/syntax/src/to_parser_code.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,8 @@ impl<'context> CombinatorNode<'context> {
Production::Scanner { name, .. } => {
let kind = format_ident!("{name}");
let function_name = format_ident!("scan_{name}", name = name.to_snake_case());
let scanner = quote! { self.#function_name(stream) };
let error_message = name;
scanner_code_to_parser_code(scanner, kind, &error_message, !is_trivia)
let scanner = quote! { Self::#function_name };
scanner_code_to_parser_code(scanner, kind, !is_trivia)
}
Production::TriviaParser { name, .. } => {
let function_name = format_ident!("parse_{name}", name = name.to_snake_case());
Expand Down Expand Up @@ -588,45 +587,25 @@ fn scanner_production_to_parser_code(
if let Production::Scanner { name, .. } = open.production.as_ref() {
let kind = format_ident!("{name}");
let function_name = format_ident!("scan_{name}", name = name.to_snake_case());
let scanner = quote! { self.#function_name(stream) };
let error_message = name;
scanner_code_to_parser_code(scanner, kind, &error_message, !is_trivia)
let scanner = quote! { Self::#function_name };
scanner_code_to_parser_code(scanner, kind, !is_trivia)
} else {
unreachable!("This reference should be to a scanner")
}
}

fn scanner_code_to_parser_code(
scanner_code: TokenStream,
scanner: TokenStream,
kind: Ident,
error_message: &str,
with_trivia: bool,
) -> TokenStream {
if with_trivia {
quote! {
{
let leading_trivia = self.optional_leading_trivia(stream);
let start = stream.position();
if #scanner_code {
let end = stream.position();
let trailing_trivia = self.optional_trailing_trivia(stream);
Pass{ node: cst::Node::token(TokenKind::#kind, Range { start, end }, leading_trivia, trailing_trivia), error: None }
} else {
Fail{ error: ParseError::new(start, #error_message) }
}
}
self.parse_token_with_trivia(stream, #scanner, TokenKind::#kind)
}
} else {
quote! {
{
let start = stream.position();
if #scanner_code {
let end = stream.position();
Pass{ node: cst::Node::token(TokenKind::#kind, Range { start, end }, None, None), error: None }
} else {
Fail{ error: ParseError::new(start, #error_message) }
}
}
self.parse_token(stream, #scanner, TokenKind::#kind)
}
}
}
30 changes: 27 additions & 3 deletions crates/codegen/syntax/src/typescript_lib_code_generator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -152,15 +152,39 @@ impl CodeGenerator {
use napi_derive::napi;

#[napi]
#[derive(Debug, PartialEq, Eq, Serialize)]
#[derive(
Debug,
PartialEq,
Eq,
Serialize,
strum_macros::EnumString,
strum_macros::AsRefStr,
strum_macros::Display,
)]
#token_kinds

#[napi]
#[derive(Debug, PartialEq, Eq, Serialize)]
#[derive(
Debug,
PartialEq,
Eq,
Serialize,
strum_macros::EnumString,
strum_macros::AsRefStr,
strum_macros::Display,
)]
#rule_kinds

#[napi]
#[derive(Debug, PartialEq, Eq, Serialize)]
#[derive(
Debug,
PartialEq,
Eq,
Serialize,
strum_macros::EnumString,
strum_macros::AsRefStr,
strum_macros::Display,
)]
#production_kinds
}
};
Expand Down
41 changes: 37 additions & 4 deletions crates/codegen/syntax_templates/src/rust/kinds.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,35 @@
use serde::Serialize;
use strum_macros::*;

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
#[derive(
Clone,
Copy,
Debug,
PartialEq,
Eq,
PartialOrd,
Ord,
Serialize,
strum_macros::EnumString,
strum_macros::AsRefStr,
strum_macros::Display,
)]
pub enum TokenKind {
XXX,
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize)]
#[derive(
Clone,
Copy,
Debug,
PartialEq,
Eq,
PartialOrd,
Ord,
Serialize,
strum_macros::EnumString,
strum_macros::AsRefStr,
strum_macros::Display,
)]
pub enum RuleKind {
_SEQUENCE,
_DELIMITEDBY,
Expand All @@ -15,7 +38,17 @@ pub enum RuleKind {
}

#[derive(
Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, EnumString, AsRefStr, Display,
Clone,
Copy,
Debug,
PartialEq,
Eq,
PartialOrd,
Ord,
Serialize,
strum_macros::EnumString,
strum_macros::AsRefStr,
strum_macros::Display,
)]
pub enum ProductionKind {
XXX,
Expand Down
Loading

0 comments on commit 40c3daf

Please sign in to comment.