From d17e5a253f5ee16b8d4b8933d281cea721a012ec Mon Sep 17 00:00:00 2001 From: DvvCz <86097860+DvvCz@users.noreply.github.com> Date: Tue, 13 Dec 2022 12:19:38 -0800 Subject: [PATCH 1/5] Allow use of rust keywords as rule names This prefixes all non-builtin rules with r# to allow for use of rust keywords as pest rules. This refactors code to use format_ident! rather than Ident::new (with format! internally in some cases), as it does the same thing internally. Span is defined to fallback to Span::call_site() in case of being given a non-ident, so there shouldn't be any issues. Updated generate_complete test case and removed the rust keyword restriction from the validator. --- generator/src/generator.rs | 33 ++++++++++++++++++++++--------- meta/src/validator.rs | 40 +------------------------------------- 2 files changed, 25 insertions(+), 48 deletions(-) diff --git a/generator/src/generator.rs b/generator/src/generator.rs index f3da1bac..150d15ea 100644 --- a/generator/src/generator.rs +++ b/generator/src/generator.rs @@ -169,7 +169,7 @@ fn generate_builtin_rules() -> Vec<(&'static str, TokenStream)> { // Needed because Cargo doesn't watch for changes in grammars. fn generate_include(name: &Ident, path: &str) -> TokenStream { - let const_name = Ident::new(&format!("_PEST_GRAMMAR_{}", name), Span::call_site()); + let const_name = format_ident!("_PEST_GRAMMAR_{}", name); // Need to make this relative to the current directory since the path to the file // is derived from the CARGO_MANIFEST_DIR environment variable let mut current_dir = std::env::current_dir().expect("Unable to get current directory"); @@ -184,7 +184,7 @@ fn generate_include(name: &Ident, path: &str) -> TokenStream { fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { let rules = rules .iter() - .map(|rule| Ident::new(rule.name.as_str(), Span::call_site())); + .map(|rule| format_ident!("r#{}", rule.name.as_str())); if uses_eoi { quote! { #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] @@ -209,7 +209,7 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { let mut rules: Vec = rules .iter() .map(|rule| { - let rule = Ident::new(rule.name.as_str(), Span::call_site()); + let rule = format_ident!("r#{}", Ident::new(rule.name.as_str(), Span::call_site())); quote! { Rule::#rule => rules::#rule(state) } @@ -228,7 +228,8 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { } fn generate_rule(rule: OptimizedRule) -> TokenStream { - let name = Ident::new(&rule.name, Span::call_site()); + let name = format_ident!("r#{}", Ident::new(&rule.name, Span::call_site())); + let expr = if rule.ty == RuleType::Atomic || rule.ty == RuleType::CompoundAtomic { generate_expr_atomic(rule.expr) } else if name == "WHITESPACE" || name == "COMMENT" { @@ -364,7 +365,7 @@ fn generate_expr(expr: OptimizedExpr) -> TokenStream { } } OptimizedExpr::Ident(ident) => { - let ident = Ident::new(&ident, Span::call_site()); + let ident = format_ident!("r#{}", ident); quote! { self::#ident(state) } } OptimizedExpr::PeekSlice(start, end_) => { @@ -510,7 +511,7 @@ fn generate_expr_atomic(expr: OptimizedExpr) -> TokenStream { } } OptimizedExpr::Ident(ident) => { - let ident = Ident::new(&ident, Span::call_site()); + let ident = format_ident!("r#{}", ident); quote! { self::#ident(state) } } OptimizedExpr::PeekSlice(start, end_) => { @@ -960,11 +961,17 @@ mod tests { fn generate_complete() { let name = Ident::new("MyParser", Span::call_site()); let generics = Generics::default(); + let rules = vec![OptimizedRule { name: "a".to_owned(), ty: RuleType::Silent, expr: OptimizedExpr::Str("b".to_owned()), + }, OptimizedRule { + name: "if".to_owned(), + ty: RuleType::Silent, + expr: OptimizedExpr::Str("c".to_owned()) }]; + let defaults = vec!["ANY"]; let result = result_type(); let box_ty = box_type(); @@ -980,7 +987,8 @@ mod tests { #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { - a + r#a, + r#if } #[allow(clippy::all)] @@ -1009,10 +1017,16 @@ mod tests { #[inline] #[allow(non_snake_case, unused_variables)] - pub fn a(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { + pub fn r#a(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { state.match_string("b") } + #[inline] + #[allow(non_snake_case, unused_variables)] + pub fn r#if(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { + state.match_string("c") + } + #[inline] #[allow(dead_code, non_snake_case, unused_variables)] pub fn ANY(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { @@ -1025,7 +1039,8 @@ mod tests { ::pest::state(input, |state| { match rule { - Rule::a => rules::a(state) + Rule::r#a => rules::r#a(state), + Rule::r#if => rules::r#if(state) } }) } diff --git a/meta/src/validator.rs b/meta/src/validator.rs index 04eeeb2f..96f1038e 100644 --- a/meta/src/validator.rs +++ b/meta/src/validator.rs @@ -71,7 +71,6 @@ static BUILTINS: Lazy> = Lazy::new(|| { }); /// It checks the parsed grammar for common mistakes: -/// - using Rust keywords /// - using Pest keywords /// - duplicate rules /// - undefined rules @@ -84,6 +83,7 @@ pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result, Vec = pairs .clone() .filter(|pair| pair.as_rule() == Rule::grammar_rule) @@ -98,7 +98,6 @@ pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result, Vec) -> Result, Vec>) -> Vec> { - let mut errors = vec![]; - - for definition in definitions { - let name = definition.as_str(); - - if RUST_KEYWORDS.contains(name) { - errors.push(Error::new_from_span( - ErrorVariant::CustomError { - message: format!("{} is a rust keyword", name), - }, - *definition, - )) - } - } - - errors -} - /// Validates that the given `definitions` do not contain any Pest keywords. #[allow(clippy::ptr_arg)] pub fn validate_pest_keywords(definitions: &Vec>) -> Vec> { @@ -507,22 +485,6 @@ mod tests { #[test] #[should_panic(expected = "grammar error - --> 1:1 - | -1 | let = { \"a\" } - | ^-^ - | - = let is a rust keyword")] - fn rust_keyword() { - let input = "let = { \"a\" }"; - unwrap_or_report(validate_pairs( - PestParser::parse(Rule::grammar_rules, input).unwrap(), - )); - } - - #[test] - #[should_panic(expected = "grammar error - --> 1:1 | 1 | ANY = { \"a\" } From c8d05c10be3177941364139c240f2bfbc73b0992 Mon Sep 17 00:00:00 2001 From: DvvCz <86097860+DvvCz@users.noreply.github.com> Date: Tue, 13 Dec 2022 12:41:57 -0800 Subject: [PATCH 2/5] Test fixes and missed simplifications * Updated tests that use identifiers to escape them with `r#` * Changed the previously added `generate_complete` `r#if` test to ensure using an identifier as an expression is properly emitted * Removed some redundant Ident::new() inside of `format_ident!` calls --- generator/src/generator.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/generator/src/generator.rs b/generator/src/generator.rs index 150d15ea..e79e56dd 100644 --- a/generator/src/generator.rs +++ b/generator/src/generator.rs @@ -184,7 +184,7 @@ fn generate_include(name: &Ident, path: &str) -> TokenStream { fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { let rules = rules .iter() - .map(|rule| format_ident!("r#{}", rule.name.as_str())); + .map(|rule| format_ident!("r#{}", rule.name)); if uses_eoi { quote! { #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] @@ -209,7 +209,7 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { let mut rules: Vec = rules .iter() .map(|rule| { - let rule = format_ident!("r#{}", Ident::new(rule.name.as_str(), Span::call_site())); + let rule = format_ident!("r#{}", rule.name); quote! { Rule::#rule => rules::#rule(state) } @@ -228,7 +228,7 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { } fn generate_rule(rule: OptimizedRule) -> TokenStream { - let name = format_ident!("r#{}", Ident::new(&rule.name, Span::call_site())); + let name = format_ident!("r#{}", rule.name); let expr = if rule.ty == RuleType::Atomic || rule.ty == RuleType::CompoundAtomic { generate_expr_atomic(rule.expr) @@ -676,7 +676,7 @@ mod tests { #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] #[derive(Clone, Copy, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] pub enum Rule { - f + r#f } } .to_string() @@ -864,7 +864,7 @@ mod tests { assert_eq!( generate_expr(expr).to_string(), quote! { - self::a(state).or_else(|state| { + self::r#a(state).or_else(|state| { state.sequence(|state| { state.match_range('a'..'b').and_then(|state| { super::hidden::skip(state) @@ -930,7 +930,7 @@ mod tests { assert_eq!( generate_expr_atomic(expr).to_string(), quote! { - self::a(state).or_else(|state| { + self::r#a(state).or_else(|state| { state.sequence(|state| { state.match_range('a'..'b').and_then(|state| { state.lookahead(false, |state| { @@ -969,7 +969,7 @@ mod tests { }, OptimizedRule { name: "if".to_owned(), ty: RuleType::Silent, - expr: OptimizedExpr::Str("c".to_owned()) + expr: OptimizedExpr::Ident("a".to_owned()) }]; let defaults = vec!["ANY"]; @@ -1024,7 +1024,7 @@ mod tests { #[inline] #[allow(non_snake_case, unused_variables)] pub fn r#if(state: #box_ty<::pest::ParserState<'_, Rule>>) -> ::pest::ParseResult<#box_ty<::pest::ParserState<'_, Rule>>> { - state.match_string("c") + self::r#a(state) } #[inline] From 1553429388f7a59ec0ed311b9dffa120d64d9b2a Mon Sep 17 00:00:00 2001 From: DvvCz <86097860+DvvCz@users.noreply.github.com> Date: Tue, 13 Dec 2022 15:20:12 -0800 Subject: [PATCH 3/5] `fmt` & Revert removal of `validate_rust_keywords` --- generator/src/generator.rs | 25 +++++++++++++------------ meta/src/validator.rs | 22 ++++++++++++++++++++++ 2 files changed, 35 insertions(+), 12 deletions(-) diff --git a/generator/src/generator.rs b/generator/src/generator.rs index e79e56dd..b4b16908 100644 --- a/generator/src/generator.rs +++ b/generator/src/generator.rs @@ -182,9 +182,7 @@ fn generate_include(name: &Ident, path: &str) -> TokenStream { } fn generate_enum(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { - let rules = rules - .iter() - .map(|rule| format_ident!("r#{}", rule.name)); + let rules = rules.iter().map(|rule| format_ident!("r#{}", rule.name)); if uses_eoi { quote! { #[allow(dead_code, non_camel_case_types, clippy::upper_case_acronyms)] @@ -962,15 +960,18 @@ mod tests { let name = Ident::new("MyParser", Span::call_site()); let generics = Generics::default(); - let rules = vec![OptimizedRule { - name: "a".to_owned(), - ty: RuleType::Silent, - expr: OptimizedExpr::Str("b".to_owned()), - }, OptimizedRule { - name: "if".to_owned(), - ty: RuleType::Silent, - expr: OptimizedExpr::Ident("a".to_owned()) - }]; + let rules = vec![ + OptimizedRule { + name: "a".to_owned(), + ty: RuleType::Silent, + expr: OptimizedExpr::Str("b".to_owned()), + }, + OptimizedRule { + name: "if".to_owned(), + ty: RuleType::Silent, + expr: OptimizedExpr::Ident("a".to_owned()), + }, + ]; let defaults = vec!["ANY"]; let result = result_type(); diff --git a/meta/src/validator.rs b/meta/src/validator.rs index 96f1038e..dc0555a2 100644 --- a/meta/src/validator.rs +++ b/meta/src/validator.rs @@ -114,6 +114,28 @@ pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result, Vec>) -> Vec> { + let mut errors = vec![]; + + for definition in definitions { + let name = definition.as_str(); + + if RUST_KEYWORDS.contains(name) { + errors.push(Error::new_from_span( + ErrorVariant::CustomError { + message: format!("{} is a rust keyword", name), + }, + *definition, + )) + } + } + + errors +} + /// Validates that the given `definitions` do not contain any Pest keywords. #[allow(clippy::ptr_arg)] pub fn validate_pest_keywords(definitions: &Vec>) -> Vec> { From 13994832f5956d39cffe4bfbd46153fda084b27b Mon Sep 17 00:00:00 2001 From: DvvCz <86097860+DvvCz@users.noreply.github.com> Date: Tue, 13 Dec 2022 15:26:29 -0800 Subject: [PATCH 4/5] Remove unused `Span` import --- generator/src/generator.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/generator/src/generator.rs b/generator/src/generator.rs index b4b16908..2bfe9c13 100644 --- a/generator/src/generator.rs +++ b/generator/src/generator.rs @@ -9,7 +9,7 @@ use std::path::PathBuf; -use proc_macro2::{Span, TokenStream}; +use proc_macro2::TokenStream; use quote::{ToTokens, TokenStreamExt}; use syn::{self, Generics, Ident}; From dbbbe8a631465ca70ec79d6e458d5021b2ec75df Mon Sep 17 00:00:00 2001 From: DvvCz <86097860+DvvCz@users.noreply.github.com> Date: Tue, 13 Dec 2022 17:58:48 -0800 Subject: [PATCH 5/5] Fix whitespace/comment issue `generate_rule` was checking the name after formatting for whether it was `WHITESPACE` or `COMMENT`. Re-imported proc_macro2::Span for tests since they still use it. --- generator/src/generator.rs | 5 +++-- vm/src/lib.rs | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/generator/src/generator.rs b/generator/src/generator.rs index 2bfe9c13..9fd7a0e0 100644 --- a/generator/src/generator.rs +++ b/generator/src/generator.rs @@ -227,10 +227,9 @@ fn generate_patterns(rules: &[OptimizedRule], uses_eoi: bool) -> TokenStream { fn generate_rule(rule: OptimizedRule) -> TokenStream { let name = format_ident!("r#{}", rule.name); - let expr = if rule.ty == RuleType::Atomic || rule.ty == RuleType::CompoundAtomic { generate_expr_atomic(rule.expr) - } else if name == "WHITESPACE" || name == "COMMENT" { + } else if rule.name == "WHITESPACE" || rule.name == "COMMENT" { let atomic = generate_expr_atomic(rule.expr); quote! { @@ -658,6 +657,8 @@ fn option_type() -> TokenStream { #[cfg(test)] mod tests { + use proc_macro2::Span; + use super::*; #[test] diff --git a/vm/src/lib.rs b/vm/src/lib.rs index 7c9018a5..48391870 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -126,7 +126,7 @@ impl Vm { }; if let Some(rule) = self.rules.get(rule) { - if &rule.name == "WHITESPACE" || &rule.name == "COMMENT" { + if rule.name == "WHITESPACE" || rule.name == "COMMENT" { match rule.ty { RuleType::Normal => state.rule(&rule.name, |state| { state.atomic(Atomicity::Atomic, |state| {