Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement support for contextual keywords #598

Closed
wants to merge 16 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/breezy-kiwis-judge.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@nomicfoundation/slang": minor
---

Correctly parse contextual keywords: `error`, `revert`, `global`
5 changes: 5 additions & 0 deletions .changeset/seven-rice-smoke.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@nomicfoundation/slang": minor
---

Support parsing the contextual `emit` keyword in 0.4.x versions
8 changes: 8 additions & 0 deletions crates/codegen/grammar/src/dsl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,14 @@ macro_rules! slang_scanner {
slang_location!()
)
};
( contextual $x:literal using $p:ident ) => {
$crate::ScannerDefinitionNode::ContextualKeyword(
$x,
Box::new(slang_scanner!($p)),
slang_location!()
)
};

( $x:ident ) => {
($x::instance(), slang_location!()).into()
};
Expand Down
2 changes: 2 additions & 0 deletions crates/codegen/grammar/src/scanner_definition.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ pub enum ScannerDefinitionNode {
NotFollowedBy(Box<Self>, Box<Self>, SourceLocation),
CharRange(char, char, SourceLocation),
Literal(String, SourceLocation),
ContextualKeyword(&'static str, Box<Self>, SourceLocation),
ScannerDefinition(ScannerDefinitionRef, SourceLocation),
}

Expand Down Expand Up @@ -65,6 +66,7 @@ impl Visitable for ScannerDefinitionNode {
Self::NoneOf(_, _)
| Self::CharRange(_, _, _)
| Self::Literal(_, _)
| Self::ContextualKeyword(_, _, _)
| Self::ScannerDefinition(_, _) => {}
}
visitor.scanner_definition_node_leave(self);
Expand Down
91 changes: 46 additions & 45 deletions crates/codegen/parser/generator/src/code_generator.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use std::{
collections::{BTreeMap, BTreeSet},
mem,
path::PathBuf,
rc::Rc,
};
Expand All @@ -18,9 +17,10 @@ use codegen_grammar::{
};

use super::{
parser_definition::ParserDefinitionExtensions,
parser_definition::{ParserDefinitionExtensions, VersionQualityRangeVecExtensions},
precedence_parser_definition::PrecedenceParserDefinitionExtensions,
scanner_definition::ScannerDefinitionExtensions, trie::Trie,
scanner_definition::ScannerDefinitionExtensions,
trie::Trie,
};

#[derive(Default, Serialize)]
Expand All @@ -32,23 +32,25 @@ pub struct CodeGenerator {
production_kinds: BTreeSet<&'static str>,
trivia_kinds: BTreeSet<&'static str>,

top_level_scanner_names: BTreeSet<&'static str>,
scanner_functions: Vec<(&'static str, String)>, // (name of scanner, code)
scanner_contexts: Vec<ScannerContext>,
scanner_functions: BTreeMap<&'static str, String>, // (name of scanner, code)
scanner_contexts: BTreeMap<&'static str, ScannerContext>,
/// Contextual keywords are not scanned directly but they also have
/// a dedicated token kind and are synthesized by the parser.
// ("<identifier_scanner_name>;<literal_value>" -> version range qualifier code)
contextual_keywords: BTreeMap<String, String>,

parser_functions: Vec<(&'static str, String)>, // (name of parser, code)
parser_functions: BTreeMap<&'static str, String>, // (name of parser, code)

#[serde(skip)]
scanner_contexts_map: BTreeMap<&'static str, ScannerContext>,
top_level_scanner_names: BTreeSet<&'static str>,
#[serde(skip)]
all_scanners: BTreeMap<&'static str, ScannerDefinitionRef>,
#[serde(skip)]
current_context_name: &'static str,
}

#[derive(Serialize)]
#[derive(Default, Serialize)]
struct ScannerContext {
name: &'static str,
#[serde(skip)]
scanner_definitions: BTreeSet<&'static str>,
alpha_literal_scanner: String,
Expand Down Expand Up @@ -141,16 +143,7 @@ impl CodeGenerator {

fn set_current_context(&mut self, name: &'static str) {
self.current_context_name = name;
self.scanner_contexts_map
.entry(name)
.or_insert_with(|| ScannerContext {
name,
scanner_definitions: Default::default(),
alpha_literal_scanner: "".to_string(),
non_alpha_literal_scanner: "".to_string(),
compound_scanner_names: vec![],
delimiters: Default::default(),
});
self.scanner_contexts.entry(name).or_default();
}
}

Expand All @@ -165,25 +158,19 @@ impl GrammarVisitor for CodeGenerator {
.map(|(name, scanner)| (*name, scanner.to_scanner_code().to_string()))
.collect();

self.parser_functions.sort_by(|a, b| a.0.cmp(b.0));
self.scanner_functions.sort_by(|a, b| a.0.cmp(b.0));

for context in self.scanner_contexts_map.values_mut() {
for context in self.scanner_contexts.values_mut() {
let mut alpha_literal_trie = Trie::new();
let mut non_alpha_literal_trie = Trie::new();
let mut have_identifier_scanner = false;

// Dr Hackity McHackerson
// Identifier at the end so it doesn't grab other things.
// Not a problem when we switch to a DFA.
let have_identifier_scanner = context.scanner_definitions.remove("Identifier");
for scanner_name in &context.scanner_definitions {
let scanner = &self.all_scanners[*scanner_name];
let literals = scanner.literals();
if literals.is_empty() {
// Dr Hackity McHackerson
// Identifier at the end so it doesn't grab other things.
// Not a problem when we switch to a DFA.
if scanner_name == &"Identifier" {
have_identifier_scanner = true;
} else {
context.compound_scanner_names.push(scanner_name);
}
context.compound_scanner_names.push(scanner_name);
} else {
for literal in literals {
// This is good enough until we switch to a DFA
Expand All @@ -198,15 +185,12 @@ impl GrammarVisitor for CodeGenerator {
context.alpha_literal_scanner = alpha_literal_trie.to_scanner_code().to_string();
context.non_alpha_literal_scanner =
non_alpha_literal_trie.to_scanner_code().to_string();

if have_identifier_scanner {
context.compound_scanner_names.push("Identifier");
}
}

self.scanner_contexts = mem::take(&mut self.scanner_contexts_map)
.into_values()
.collect();

// Just being anal about tidying up :)
self.all_scanners.clear();
self.current_context_name = "";
Expand All @@ -221,15 +205,15 @@ impl GrammarVisitor for CodeGenerator {
self.production_kinds.insert(parser.name());
self.rule_kinds.insert(parser.name());
self.trivia_kinds.insert(parser.name());
self.parser_functions.push((
self.parser_functions.insert(
parser.name(),
{
let code = parser.to_parser_code();
let rule_kind = format_ident!("{}", parser.name());
quote! { #code.with_kind(RuleKind::#rule_kind) }
}
.to_string(),
))
);
}

fn parser_definition_enter(&mut self, parser: &ParserDefinitionRef) {
Expand All @@ -239,14 +223,14 @@ impl GrammarVisitor for CodeGenerator {
self.production_kinds.insert(parser.name());
self.rule_kinds.insert(parser.name());
let code = parser.to_parser_code();
self.parser_functions.push((
self.parser_functions.insert(
parser.name(),
{
let rule_kind = format_ident!("{}", parser.name());
quote! { #code.with_kind(RuleKind::#rule_kind) }
}
.to_string(),
));
);
}
}

Expand All @@ -257,15 +241,15 @@ impl GrammarVisitor for CodeGenerator {
for (_, _, name, _) in &parser.node().operators {
self.rule_kinds.insert(name);
}
self.parser_functions.push((
self.parser_functions.insert(
parser.name(),
{
let code = parser.to_parser_code();
let rule_kind = format_ident!("{}", parser.name());
quote! { #code.with_kind(RuleKind::#rule_kind) }
}
.to_string(),
))
);
}

fn scanner_definition_node_enter(&mut self, node: &ScannerDefinitionNode) {
Expand All @@ -286,11 +270,28 @@ impl GrammarVisitor for CodeGenerator {
ParserDefinitionNode::ScannerDefinition(scanner, _) => {
self.top_level_scanner_names.insert(scanner.name());
self.token_kinds.insert(scanner.name());
self.scanner_contexts_map
self.scanner_contexts
.get_mut(&self.current_context_name)
.unwrap()
.scanner_definitions
.insert(scanner.name());

if let Some(contextual_keyword) = scanner.as_contextual_keyword() {
let keyword = format_ident!("{}", scanner.name());
// NOTE: This does not support disjoint version ranges yet,
// e.g. `0.4.21..0.5 or 0.6..0.7` (ranges are &&'ed together for now)
let code = contextual_keyword
.version_quality_ranges
.wrap_code(quote! { Some(TokenKind::#keyword) }, Some(quote! { None }))
.to_string();

// The real key is the (scanner_name, literal) tuple but we can't serialize that,
// see https://github.com/Keats/tera/issues/522
self.contextual_keywords.insert(
[contextual_keyword.ident_scanner, contextual_keyword.literal].join(";"),
code,
);
}
}
// Collect delimiters for each context
ParserDefinitionNode::DelimitedBy(open, _, close, _) => {
Expand All @@ -303,7 +304,7 @@ impl GrammarVisitor for CodeGenerator {
};

let delimiters = &mut self
.scanner_contexts_map
.scanner_contexts
.get_mut(&self.current_context_name)
.unwrap()
.delimiters;
Expand Down
86 changes: 85 additions & 1 deletion crates/codegen/parser/generator/src/scanner_definition.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use std::collections::BTreeSet;

use codegen_grammar::{ScannerDefinitionNode, ScannerDefinitionRef};
use codegen_grammar::{ScannerDefinitionNode, ScannerDefinitionRef, VersionQualityRange};
use inflector::Inflector;
use proc_macro2::TokenStream;
use quote::{format_ident, quote};
Expand All @@ -10,6 +10,7 @@ use super::parser_definition::VersionQualityRangeVecExtensions;
pub trait ScannerDefinitionExtensions {
fn to_scanner_code(&self) -> TokenStream;
fn literals(&self) -> Vec<String>;
fn as_contextual_keyword(&self) -> Option<ContextualKeywordScanner>;
}

impl ScannerDefinitionExtensions for ScannerDefinitionRef {
Expand All @@ -24,11 +25,15 @@ impl ScannerDefinitionExtensions for ScannerDefinitionRef {
vec![]
}
}
fn as_contextual_keyword(&self) -> Option<ContextualKeywordScanner> {
self.node().as_contextual_keyword()
}
}

pub trait ScannerDefinitionNodeExtensions {
fn to_scanner_code(&self) -> TokenStream;
fn literals(&self, accum: &mut BTreeSet<String>) -> bool;
fn as_contextual_keyword(&self) -> Option<ContextualKeywordScanner>;
}

impl ScannerDefinitionNodeExtensions for ScannerDefinitionNode {
Expand All @@ -40,6 +45,10 @@ impl ScannerDefinitionNodeExtensions for ScannerDefinitionNode {
accum.insert(string.clone());
true
}
ScannerDefinitionNode::ContextualKeyword(string, ..) => {
accum.insert(string.to_string());
true
}
ScannerDefinitionNode::Choice(nodes, _) => nodes
.iter()
.fold(true, |result, node| node.literals(accum) && result),
Expand Down Expand Up @@ -121,6 +130,12 @@ impl ScannerDefinitionNodeExtensions for ScannerDefinitionNode {
quote! { scan_chars!(input, #(#chars),*) }
}

ScannerDefinitionNode::ContextualKeyword(_value, _scanner, _) => {
// Contextual keywords are not independent scanners but rather
// upgraded to by the parser.
quote! { false }
}

ScannerDefinitionNode::ScannerDefinition(scanner_definition, _) => {
let name = scanner_definition.name();
let snake_case = name.to_snake_case();
Expand All @@ -129,4 +144,73 @@ impl ScannerDefinitionNodeExtensions for ScannerDefinitionNode {
}
}
}

/// Returns `Some` if the scanner definition defines a contextual keyword at some version,
/// `None` otherwise.
///
/// The contextual keywords scanners are not ordinary, as they are only synthesized
/// from the underlying scanner when parsing, rather than scanned independently.
fn as_contextual_keyword(&self) -> Option<ContextualKeywordScanner> {
match self {
ScannerDefinitionNode::Versioned(node, version_quality_ranges, _) => {
match node.as_ref() {
ScannerDefinitionNode::ContextualKeyword(literal, ident_scanner, _) => {
let ident_scanner = match ident_scanner.as_ref() {
ScannerDefinitionNode::ScannerDefinition(def, ..) => def.name(),
_ => unreachable!("Contextual keywords must be defined using an underlying scanner definition"),
};

Some(ContextualKeywordScanner {
ident_scanner,
literal,
version_quality_ranges: version_quality_ranges.clone(),
})
}
_ => None,
}
}
ScannerDefinitionNode::ContextualKeyword(literal, ident_scanner, _) => {
let ident_scanner = match ident_scanner.as_ref() {
ScannerDefinitionNode::ScannerDefinition(def, ..) => def.name(),
_ => unreachable!("Contextual keywords must be defined using an underlying scanner definition"),
};

Some(ContextualKeywordScanner {
ident_scanner,
literal,
version_quality_ranges: vec![],
})
}
ScannerDefinitionNode::Choice(nodes, _) => nodes
.iter()
.filter_map(Self::as_contextual_keyword)
.fold(None, |acc, item| {
let Some(mut acc) = acc else { return Some(item) };

assert_eq!(
acc.ident_scanner, item.ident_scanner,
"Contextual keywords must have the same underlying scanner definition"
);
assert_eq!(
acc.literal, item.literal,
"Contextual keyword definition must have the same literal"
);

acc.version_quality_ranges
.extend(item.version_quality_ranges);

Some(acc)
}),
_ => None,
}
}
}

pub struct ContextualKeywordScanner {
/// Name of the underlying scanner
pub ident_scanner: &'static str,
/// Literal of the contextual keyword
pub literal: &'static str,
/// Version ranges at which the literal is a contextual keyword
pub version_quality_ranges: Vec<VersionQualityRange>,
}
Loading