From 4e8f4d0be1a591f88dc49e1f924024b87cf06a8d Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Sun, 8 Dec 2024 14:50:01 +0100 Subject: [PATCH 1/8] initialize nom-language --- Cargo.toml | 2 +- nom-language/Cargo.toml | 11 +++++++++++ nom-language/src/error.rs | 0 nom-language/src/lib.rs | 8 ++++++++ 4 files changed, 20 insertions(+), 1 deletion(-) create mode 100644 nom-language/Cargo.toml create mode 100644 nom-language/src/error.rs create mode 100644 nom-language/src/lib.rs diff --git a/Cargo.toml b/Cargo.toml index d9c018d2a..9b25e2277 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -142,4 +142,4 @@ coveralls = { repository = "Geal/nom", branch = "main", service = "github" } maintenance = { status = "actively-developed" } [workspace] -members = [".", "benchmarks/"] +members = [".", "benchmarks/", "nom-language"] diff --git a/nom-language/Cargo.toml b/nom-language/Cargo.toml new file mode 100644 index 000000000..0503cd7d6 --- /dev/null +++ b/nom-language/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "nom-language" +version = "0.0.1" +authors = ["contact@geoffroycouprie.com"] +description = "Language parsing focused combinators for the nom parser library" +edition = "2021" +license = "MIT" +repository = "https://github.com/rust-bakery/nom" + +[dependencies] +nom = { path = "..", version = "8.0.0-alpha2" } \ No newline at end of file diff --git a/nom-language/src/error.rs b/nom-language/src/error.rs new file mode 100644 index 000000000..e69de29bb diff --git a/nom-language/src/lib.rs b/nom-language/src/lib.rs new file mode 100644 index 000000000..01b2862e6 --- /dev/null +++ b/nom-language/src/lib.rs @@ -0,0 +1,8 @@ +//! # Langage parsing combinators for the nom parser combinators library +//! +//! nom is a parser combinator library with a focus on safe parsing, +//! streaming patterns, and zero copy. +//! While nom provides general purpose combinators, this crate is targeted +//! at language parsing. + +pub mod error; From c8c03131a6b17442142742e2d09a8760efafae00 Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Sun, 8 Dec 2024 15:06:53 +0100 Subject: [PATCH 2/8] move VerboseError to nom-language --- Cargo.toml | 2 +- benchmarks/Cargo.toml | 1 + benchmarks/benches/json.rs | 3 +- examples/json.rs | 3 +- examples/s_expression.rs | 3 +- nom-language/src/error.rs | 262 +++++++++++++++++++++++++++++++++++++ src/error.rs | 241 +--------------------------------- src/traits.rs | 20 --- tests/issues.rs | 21 --- 9 files changed, 271 insertions(+), 285 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 9b25e2277..ec1933b31 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -41,7 +41,7 @@ default-features = false [dev-dependencies] doc-comment = "0.3" proptest = "=1.0.0" - +nom-language = { path = "./nom-language" } [package.metadata.docs.rs] features = ["alloc", "std", "docsrs"] diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 689630cc4..50caef2d9 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -56,3 +56,4 @@ harness = false [dev-dependencies] codspeed-criterion-compat = "2.4.1" +nom-language = { path = "../nom-language" } diff --git a/benchmarks/benches/json.rs b/benchmarks/benches/json.rs index 0c6babbde..805f4d50e 100644 --- a/benchmarks/benches/json.rs +++ b/benchmarks/benches/json.rs @@ -7,13 +7,14 @@ use nom::{ bytes::{tag, take}, character::{anychar, char, multispace0, none_of}, combinator::{map, map_opt, map_res, value, verify}, - error::{Error, ErrorKind, FromExternalError, ParseError, VerboseError}, + error::{Error, ErrorKind, FromExternalError, ParseError}, multi::{fold, separated_list0}, number::double, number::recognize_float, sequence::{delimited, preceded, separated_pair}, Check, Complete, Emit, IResult, Mode, OutputM, Parser, }; +use nom_language::error::VerboseError; use std::{collections::HashMap, marker::PhantomData, num::ParseIntError}; diff --git a/examples/json.rs b/examples/json.rs index d47d99b36..85003b053 100644 --- a/examples/json.rs +++ b/examples/json.rs @@ -5,12 +5,13 @@ use nom::{ bytes::complete::{escaped, tag, take_while}, character::complete::{alphanumeric1 as alphanumeric, char, one_of}, combinator::{cut, map, opt, value}, - error::{context, convert_error, ContextError, ErrorKind, ParseError, VerboseError}, + error::{context, ContextError, ErrorKind, ParseError}, multi::separated_list0, number::complete::double, sequence::{delimited, preceded, separated_pair, terminated}, Err, IResult, Parser, }; +use nom_language::error::{convert_error, VerboseError}; use std::collections::HashMap; use std::str; diff --git a/examples/s_expression.rs b/examples/s_expression.rs index a85513bec..e034b9c0a 100644 --- a/examples/s_expression.rs +++ b/examples/s_expression.rs @@ -9,11 +9,12 @@ use nom::{ bytes::complete::tag, character::complete::{alpha1, char, digit1, multispace0, multispace1, one_of}, combinator::{cut, map, map_res, opt}, - error::{context, VerboseError}, + error::context, multi::many, sequence::{delimited, preceded, terminated}, IResult, Parser, }; +use nom_language::error::VerboseError; /// We start by defining the types that define the shape of data that we want. /// In this case, we want something tree-like diff --git a/nom-language/src/error.rs b/nom-language/src/error.rs index e69de29bb..4d38f4293 100644 --- a/nom-language/src/error.rs +++ b/nom-language/src/error.rs @@ -0,0 +1,262 @@ +use std::fmt; + +use nom::{ + error::{ContextError, ErrorKind, FromExternalError, ParseError}, + ErrorConvert, +}; + +/// This error type accumulates errors and their position when backtracking +/// through a parse tree. With some post processing, +/// it can be used to display user friendly error messages +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct VerboseError { + /// List of errors accumulated by `VerboseError`, containing the affected + /// part of input data, and some context + pub errors: Vec<(I, VerboseErrorKind)>, +} + +#[derive(Clone, Debug, Eq, PartialEq)] +/// Error context for `VerboseError` +pub enum VerboseErrorKind { + /// Static string added by the `context` function + Context(&'static str), + /// Indicates which character was expected by the `char` function + Char(char), + /// Error kind given by various nom parsers + Nom(ErrorKind), +} + +impl ParseError for VerboseError { + fn from_error_kind(input: I, kind: ErrorKind) -> Self { + VerboseError { + errors: vec![(input, VerboseErrorKind::Nom(kind))], + } + } + + fn append(input: I, kind: ErrorKind, mut other: Self) -> Self { + other.errors.push((input, VerboseErrorKind::Nom(kind))); + other + } + + fn from_char(input: I, c: char) -> Self { + VerboseError { + errors: vec![(input, VerboseErrorKind::Char(c))], + } + } +} + +impl ContextError for VerboseError { + fn add_context(input: I, ctx: &'static str, mut other: Self) -> Self { + other.errors.push((input, VerboseErrorKind::Context(ctx))); + other + } +} + +impl FromExternalError for VerboseError { + /// Create a new error from an input position and an external error + fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self { + Self::from_error_kind(input, kind) + } +} + +impl fmt::Display for VerboseError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!(f, "Parse error:")?; + for (input, error) in &self.errors { + match error { + VerboseErrorKind::Nom(e) => writeln!(f, "{:?} at: {}", e, input)?, + VerboseErrorKind::Char(c) => writeln!(f, "expected '{}' at: {}", c, input)?, + VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?, + } + } + + Ok(()) + } +} + +impl std::error::Error for VerboseError {} + +impl From> for VerboseError> { + fn from(value: VerboseError<&[u8]>) -> Self { + VerboseError { + errors: value + .errors + .into_iter() + .map(|(i, e)| (i.to_owned(), e)) + .collect(), + } + } +} + +impl From> for VerboseError { + fn from(value: VerboseError<&str>) -> Self { + VerboseError { + errors: value + .errors + .into_iter() + .map(|(i, e)| (i.to_owned(), e)) + .collect(), + } + } +} + +impl ErrorConvert> for VerboseError<(I, usize)> { + fn convert(self) -> VerboseError { + VerboseError { + errors: self.errors.into_iter().map(|(i, e)| (i.0, e)).collect(), + } + } +} + +impl ErrorConvert> for VerboseError { + fn convert(self) -> VerboseError<(I, usize)> { + VerboseError { + errors: self.errors.into_iter().map(|(i, e)| ((i, 0), e)).collect(), + } + } +} + +/// Transforms a `VerboseError` into a trace with input position information +/// +/// The errors contain references to input data that must come from `input`, +/// because nom calculates byte offsets between them +pub fn convert_error>(input: I, e: VerboseError) -> String { + use nom::Offset; + use std::fmt::Write; + + let mut result = String::new(); + + for (i, (substring, kind)) in e.errors.iter().enumerate() { + let offset = input.offset(substring); + + if input.is_empty() { + match kind { + VerboseErrorKind::Char(c) => { + write!(&mut result, "{}: expected '{}', got empty input\n\n", i, c) + } + VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s), + VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e), + } + } else { + let prefix = &input.as_bytes()[..offset]; + + // Count the number of newlines in the first `offset` bytes of input + let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1; + + // Find the line that includes the subslice: + // Find the *last* newline before the substring starts + let line_begin = prefix + .iter() + .rev() + .position(|&b| b == b'\n') + .map(|pos| offset - pos) + .unwrap_or(0); + + // Find the full line after that newline + let line = input[line_begin..] + .lines() + .next() + .unwrap_or(&input[line_begin..]) + .trim_end(); + + // The (1-indexed) column number is the offset of our substring into that line + let column_number = line.offset(substring) + 1; + + match kind { + VerboseErrorKind::Char(c) => { + if let Some(actual) = substring.chars().next() { + write!( + &mut result, + "{i}: at line {line_number}:\n\ + {line}\n\ + {caret:>column$}\n\ + expected '{expected}', found {actual}\n\n", + i = i, + line_number = line_number, + line = line, + caret = '^', + column = column_number, + expected = c, + actual = actual, + ) + } else { + write!( + &mut result, + "{i}: at line {line_number}:\n\ + {line}\n\ + {caret:>column$}\n\ + expected '{expected}', got end of input\n\n", + i = i, + line_number = line_number, + line = line, + caret = '^', + column = column_number, + expected = c, + ) + } + } + VerboseErrorKind::Context(s) => write!( + &mut result, + "{i}: at line {line_number}, in {context}:\n\ + {line}\n\ + {caret:>column$}\n\n", + i = i, + line_number = line_number, + context = s, + line = line, + caret = '^', + column = column_number, + ), + VerboseErrorKind::Nom(e) => write!( + &mut result, + "{i}: at line {line_number}, in {nom_err:?}:\n\ + {line}\n\ + {caret:>column$}\n\n", + i = i, + line_number = line_number, + nom_err = e, + line = line, + caret = '^', + column = column_number, + ), + } + } + // Because `write!` to a `String` is infallible, this `unwrap` is fine. + .unwrap(); + } + + result +} + +#[test] +fn convert_error_panic() { + use nom::character::complete::char; + use nom::IResult; + + let input = ""; + + let _result: IResult<_, _, VerboseError<&str>> = char('x')(input); +} + +#[test] +fn issue_1027_convert_error_panic_nonempty() { + use nom::character::complete::char; + use nom::sequence::pair; + use nom::Err; + use nom::IResult; + use nom::Parser; + + let input = "a"; + + let result: IResult<_, _, VerboseError<&str>> = pair(char('a'), char('b')).parse(input); + let err = match result.unwrap_err() { + Err::Error(e) => e, + _ => unreachable!(), + }; + + let msg = convert_error(input, err); + assert_eq!( + msg, + "0: at line 1:\na\n ^\nexpected \'b\', got end of input\n\n" + ); +} diff --git a/src/error.rs b/src/error.rs index f4072c3a4..bec263b53 100644 --- a/src/error.rs +++ b/src/error.rs @@ -15,7 +15,7 @@ use crate::internal::IResult; /// This trait must be implemented by the error type of a nom parser. /// /// There are already implementations of it for `(Input, ErrorKind)` -/// and `VerboseError`. +/// and `Error`. /// /// It provides methods to create an error from some combinators, /// and combine existing errors in combinators like `alt`. @@ -212,117 +212,6 @@ pub fn append_error>(input: I, kind: ErrorKind, other: E) -> E::append(input, kind, other) } -/// This error type accumulates errors and their position when backtracking -/// through a parse tree. With some post processing (cf `examples/json.rs`), -/// it can be used to display user friendly error messages -#[cfg(feature = "alloc")] -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -#[derive(Clone, Debug, Eq, PartialEq)] -pub struct VerboseError { - /// List of errors accumulated by `VerboseError`, containing the affected - /// part of input data, and some context - pub errors: crate::lib::std::vec::Vec<(I, VerboseErrorKind)>, -} - -#[cfg(feature = "alloc")] -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -#[derive(Clone, Debug, Eq, PartialEq)] -/// Error context for `VerboseError` -pub enum VerboseErrorKind { - /// Static string added by the `context` function - Context(&'static str), - /// Indicates which character was expected by the `char` function - Char(char), - /// Error kind given by various nom parsers - Nom(ErrorKind), -} - -#[cfg(feature = "alloc")] -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -impl ParseError for VerboseError { - fn from_error_kind(input: I, kind: ErrorKind) -> Self { - VerboseError { - errors: vec![(input, VerboseErrorKind::Nom(kind))], - } - } - - fn append(input: I, kind: ErrorKind, mut other: Self) -> Self { - other.errors.push((input, VerboseErrorKind::Nom(kind))); - other - } - - fn from_char(input: I, c: char) -> Self { - VerboseError { - errors: vec![(input, VerboseErrorKind::Char(c))], - } - } -} - -#[cfg(feature = "alloc")] -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -impl ContextError for VerboseError { - fn add_context(input: I, ctx: &'static str, mut other: Self) -> Self { - other.errors.push((input, VerboseErrorKind::Context(ctx))); - other - } -} - -#[cfg(feature = "alloc")] -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -impl FromExternalError for VerboseError { - /// Create a new error from an input position and an external error - fn from_external_error(input: I, kind: ErrorKind, _e: E) -> Self { - Self::from_error_kind(input, kind) - } -} - -#[cfg(feature = "alloc")] -impl fmt::Display for VerboseError { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - writeln!(f, "Parse error:")?; - for (input, error) in &self.errors { - match error { - VerboseErrorKind::Nom(e) => writeln!(f, "{:?} at: {}", e, input)?, - VerboseErrorKind::Char(c) => writeln!(f, "expected '{}' at: {}", c, input)?, - VerboseErrorKind::Context(s) => writeln!(f, "in section '{}', at: {}", s, input)?, - } - } - - Ok(()) - } -} - -#[cfg(feature = "std")] -impl std::error::Error for VerboseError {} - -#[cfg(feature = "alloc")] -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -impl From> for VerboseError> { - fn from(value: VerboseError<&[u8]>) -> Self { - VerboseError { - errors: value - .errors - .into_iter() - .map(|(i, e)| (i.to_owned(), e)) - .collect(), - } - } -} - -#[cfg(feature = "alloc")] -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -impl From> for VerboseError { - fn from(value: VerboseError<&str>) -> Self { - VerboseError { - errors: value - .errors - .into_iter() - .map(|(i, e)| (i.to_owned(), e)) - .collect(), - } - } -} - /// Create a new error from an input position, a static string and an existing error. /// This is used mainly in the [context] combinator, to add user friendly information /// to errors when backtracking through a parse tree @@ -360,123 +249,6 @@ where } } -/// Transforms a `VerboseError` into a trace with input position information -/// -/// The errors contain references to input data that must come from `input`, -/// because nom calculates byte offsets between them -#[cfg(feature = "alloc")] -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -pub fn convert_error>( - input: I, - e: VerboseError, -) -> crate::lib::std::string::String { - use crate::lib::std::fmt::Write; - use crate::traits::Offset; - - let mut result = crate::lib::std::string::String::new(); - - for (i, (substring, kind)) in e.errors.iter().enumerate() { - let offset = input.offset(substring); - - if input.is_empty() { - match kind { - VerboseErrorKind::Char(c) => { - write!(&mut result, "{}: expected '{}', got empty input\n\n", i, c) - } - VerboseErrorKind::Context(s) => write!(&mut result, "{}: in {}, got empty input\n\n", i, s), - VerboseErrorKind::Nom(e) => write!(&mut result, "{}: in {:?}, got empty input\n\n", i, e), - } - } else { - let prefix = &input.as_bytes()[..offset]; - - // Count the number of newlines in the first `offset` bytes of input - let line_number = prefix.iter().filter(|&&b| b == b'\n').count() + 1; - - // Find the line that includes the subslice: - // Find the *last* newline before the substring starts - let line_begin = prefix - .iter() - .rev() - .position(|&b| b == b'\n') - .map(|pos| offset - pos) - .unwrap_or(0); - - // Find the full line after that newline - let line = input[line_begin..] - .lines() - .next() - .unwrap_or(&input[line_begin..]) - .trim_end(); - - // The (1-indexed) column number is the offset of our substring into that line - let column_number = line.offset(substring) + 1; - - match kind { - VerboseErrorKind::Char(c) => { - if let Some(actual) = substring.chars().next() { - write!( - &mut result, - "{i}: at line {line_number}:\n\ - {line}\n\ - {caret:>column$}\n\ - expected '{expected}', found {actual}\n\n", - i = i, - line_number = line_number, - line = line, - caret = '^', - column = column_number, - expected = c, - actual = actual, - ) - } else { - write!( - &mut result, - "{i}: at line {line_number}:\n\ - {line}\n\ - {caret:>column$}\n\ - expected '{expected}', got end of input\n\n", - i = i, - line_number = line_number, - line = line, - caret = '^', - column = column_number, - expected = c, - ) - } - } - VerboseErrorKind::Context(s) => write!( - &mut result, - "{i}: at line {line_number}, in {context}:\n\ - {line}\n\ - {caret:>column$}\n\n", - i = i, - line_number = line_number, - context = s, - line = line, - caret = '^', - column = column_number, - ), - VerboseErrorKind::Nom(e) => write!( - &mut result, - "{i}: at line {line_number}, in {nom_err:?}:\n\ - {line}\n\ - {caret:>column$}\n\n", - i = i, - line_number = line_number, - nom_err = e, - line = line, - caret = '^', - column = column_number, - ), - } - } - // Because `write!` to a `String` is infallible, this `unwrap` is fine. - .unwrap(); - } - - result -} - /// Indicates which parser returned an error #[rustfmt::skip] #[derive(Debug,PartialEq,Eq,Hash,Clone,Copy)] @@ -791,17 +563,6 @@ mod tests { ); } - #[cfg(feature = "alloc")] - #[test] - fn convert_error_panic() { - use crate::character::complete::char; - use crate::internal::IResult; - - let input = ""; - - let _result: IResult<_, _, VerboseError<&str>> = char('x')(input); - } - #[cfg(feature = "alloc")] #[test] fn clone_error() { diff --git a/src/traits.rs b/src/traits.rs index 1313953ec..f778c3601 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -1309,26 +1309,6 @@ impl ErrorConvert> for error::Error { } } -#[cfg(feature = "alloc")] -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -impl ErrorConvert> for error::VerboseError<(I, usize)> { - fn convert(self) -> error::VerboseError { - error::VerboseError { - errors: self.errors.into_iter().map(|(i, e)| (i.0, e)).collect(), - } - } -} - -#[cfg(feature = "alloc")] -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] -impl ErrorConvert> for error::VerboseError { - fn convert(self) -> error::VerboseError<(I, usize)> { - error::VerboseError { - errors: self.errors.into_iter().map(|(i, e)| ((i, 0), e)).collect(), - } - } -} - impl ErrorConvert<()> for () { fn convert(self) {} } diff --git a/tests/issues.rs b/tests/issues.rs index ecff7b3a2..2b1923d6a 100644 --- a/tests/issues.rs +++ b/tests/issues.rs @@ -178,27 +178,6 @@ fn issue_many_m_n_with_zeros() { assert_eq!(parser.parse("aaa"), Ok(("aaa", vec!()))); } -#[test] -fn issue_1027_convert_error_panic_nonempty() { - use nom::character::complete::char; - use nom::error::{convert_error, VerboseError}; - use nom::sequence::pair; - - let input = "a"; - - let result: IResult<_, _, VerboseError<&str>> = pair(char('a'), char('b')).parse(input); - let err = match result.unwrap_err() { - Err::Error(e) => e, - _ => unreachable!(), - }; - - let msg = convert_error(input, err); - assert_eq!( - msg, - "0: at line 1:\na\n ^\nexpected \'b\', got end of input\n\n" - ); -} - #[test] fn issue_1231_bits_expect_fn_closure() { use nom::bits::{bits, complete::take}; From 6c124693c10dd3e932013fdfa0e1690af8b53af7 Mon Sep 17 00:00:00 2001 From: cenodis <57576911+cenodis@users.noreply.github.com> Date: Sun, 8 Dec 2024 17:09:15 +0100 Subject: [PATCH 3/8] Precedence parsing (#1362) * Initial prototype * Update docs Remove unused code * More doc updates * Add feature flags for Vec * Add basic tests * Fix formatting * Add precedence to choosing_a_combinator.md * Fix typo * Minor refractoring * Update docs * Change parameter order * Add alloc feature to the entire precedence module The parser really cant work without it and the helpers dont make much sense without the parser. * Use fail parser to express "no operators of this type" * Document evaluation order * Better documentation for parameters * Fix precedence in documentation * Fix doc formatting * Fix typos * Use map_res when parsing integers * Example test for expressions with function calls and AST generation * Typo * Make evaluation a bit easier to read * Update expression_ast * Update expression_ast doc * Implement ternary operator in expression_ast * Shorten ast nodes * Implement some tests for parser failures * Update feature flags for docs * Properly append errors * Properly bubble up non Error errors * Split operators into 3 distinct types to help with exhaustiveness checks. --------- Co-authored-by: Geoffroy Couprie --- Cargo.toml | 4 + doc/choosing_a_combinator.md | 1 + src/error.rs | 3 + src/lib.rs | 2 + src/precedence/mod.rs | 379 +++++++++++++++++++++++++++++++++++ src/precedence/tests.rs | 75 +++++++ tests/expression_ast.rs | 158 +++++++++++++++ 7 files changed, 622 insertions(+) create mode 100644 src/precedence/mod.rs create mode 100644 src/precedence/tests.rs create mode 100644 tests/expression_ast.rs diff --git a/Cargo.toml b/Cargo.toml index ec1933b31..d88e70a44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -66,6 +66,10 @@ name = "css" [[test]] name = "custom_errors" +[[test]] +name = "expression_ast" +required-features = ["alloc"] + [[test]] name = "float" diff --git a/doc/choosing_a_combinator.md b/doc/choosing_a_combinator.md index 3363f63c5..dfdee0940 100644 --- a/doc/choosing_a_combinator.md +++ b/doc/choosing_a_combinator.md @@ -106,6 +106,7 @@ The following parsers could be found on [docs.rs number section](https://docs.rs - [`escaped`](https://docs.rs/nom/latest/nom/bytes/complete/fn.escaped.html): Matches a byte string with escaped characters - [`escaped_transform`](https://docs.rs/nom/latest/nom/bytes/complete/fn.escaped_transform.html): Matches a byte string with escaped characters, and returns a new string with the escaped characters replaced +- [`precedence`](https://docs.rs/nom/latest/nom/precedence/fn.precedence.html): Parses an expression with regards to operator precedence ## Binary format parsing diff --git a/src/error.rs b/src/error.rs index bec263b53..dac6a4007 100644 --- a/src/error.rs +++ b/src/error.rs @@ -310,6 +310,7 @@ pub enum ErrorKind { Fail, Many, Fold, + Precedence, } #[rustfmt::skip] @@ -373,6 +374,7 @@ pub fn error_to_u32(e: &ErrorKind) -> u32 { ErrorKind::Many => 76, ErrorKind::Fold => 77, ErrorKind::BinDigit => 78, + ErrorKind::Precedence => 79, } } @@ -438,6 +440,7 @@ impl ErrorKind { ErrorKind::Fail => "Fail", ErrorKind::Many => "Many", ErrorKind::Fold => "Fold", + ErrorKind::Precedence => "Precedence", } } } diff --git a/src/lib.rs b/src/lib.rs index c82715a3b..db4c8703b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -451,6 +451,8 @@ pub mod bytes; pub mod character; +pub mod precedence; + mod str; pub mod number; diff --git a/src/precedence/mod.rs b/src/precedence/mod.rs new file mode 100644 index 000000000..c518fdff3 --- /dev/null +++ b/src/precedence/mod.rs @@ -0,0 +1,379 @@ +//! Combinators to parse expressions with operator precedence. +#![cfg(feature="alloc")] +#![cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] + +#[cfg(test)] +mod tests; + +use crate::error::{ErrorKind, FromExternalError, ParseError}; +use crate::lib::std::vec::Vec; +use crate::{Err, IResult, Parser}; + +/// An unary operator. +pub struct Unary { + value: V, + precedence: Q, +} + +/// A binary operator. +pub struct Binary { + value: V, + precedence: Q, + assoc: Assoc, +} + +/// A single evaluation step. +pub enum Operation { + /// A prefix operation. + Prefix(P1, O), + /// A postfix operation. + Postfix(O, P2), + /// A binary operation. + Binary(O, P3, O), +} + +/// Associativity for binary operators. +#[derive(Copy, Clone, PartialEq, Eq)] +pub enum Assoc { + /// Left associative. + Left, + /// Right associative. + Right, +} + +/// Element for operator stack. +enum Operator { + Prefix(P1, Q), + Postfix(P2, Q), + Binary(P3, Q, Assoc), +} + +impl Operator +where + Q: Ord + Copy, +{ + fn precedence(&self) -> Q { + match self { + Operator::Prefix(_, p) => *p, + Operator::Postfix(_, p) => *p, + Operator::Binary(_, p, _) => *p, + } + } + + fn is_postfix(&self) -> bool { + match self { + Operator::Postfix(_, _) => true, + _ => false, + } + } +} + +/// Runs the inner parser and transforms the result into an unary operator with the given precedence. +/// +/// Intended for use with [precedence]. +/// # Arguments +/// * `precedence` The precedence of the operator. +/// * `parser` The parser to apply. +pub fn unary_op( + precedence: Q, + mut parser: P, +) -> impl FnMut(I) -> IResult, E> +where + P: Parser, + Q: Ord + Copy, +{ + move |input| match parser.parse(input) { + Ok((i, value)) => Ok(( + i, + Unary { + value, + precedence, + }, + )), + Err(e) => Err(e), + } +} + +/// Runs the inner parser and transforms the result into a binary operator with the given precedence and associativity. +/// +/// Intended for use with [precedence]. +/// # Arguments +/// * `precedence` The precedence of the operator. +/// * `assoc` The associativity of the operator. +/// * `parser` The parser to apply. +pub fn binary_op( + precedence: Q, + assoc: Assoc, + mut parser: P, +) -> impl FnMut(I) -> IResult, E> +where + P: Parser, + Q: Ord + Copy, +{ + move |input| match parser.parse(input) { + Ok((i, value)) => Ok(( + i, + Binary { + value, + precedence, + assoc, + }, + )), + Err(e) => Err(e), + } +} + +/// Parses an expression with operator precedence. +/// +/// Supports prefix, postfix and binary operators. Operators are applied in ascending precedence. +/// +/// The parser will track its current position inside the expression and call the respective +/// operand/operator parsers. The prefix and postfix parsers are called repeatedly until they fail before +/// execution moves on to the operand or binary parser. +/// +/// Expressions are folded as soon as possible. The result will be reused as another operand. After the +/// expression has been read completely any remaining operations are folded and the resulting, single +/// operand is returned as the result. +/// +/// It will return `Err(Err:Error((_, ErrorKind::Precedence)))` if: +/// * the `fold` function returns an `Err`. +/// * more than one or no operands remain after the expression has been evaluated completely. +/// * the input does not match the pattern: `prefix* operand postfix* (binary prefix* operand postfix*)*` +/// +/// # Arguments +/// * `prefix` Parser for prefix unary operators. +/// * `postfix` Parser for postfix unary operators. +/// * `binary` Parser for binary operators. +/// * `operand` Parser for operands. +/// * `fold` Function that evaluates a single operation and returns the result. +/// +/// # Example +/// ```rust +/// # use nom::{Err, error::{Error, ErrorKind}, IResult}; +/// use nom::precedence::{precedence, unary_op, binary_op, Assoc, Operation}; +/// use nom::character::complete::digit1; +/// use nom::combinator::{map_res, fail}; +/// use nom::sequence::delimited; +/// use nom::bytes::complete::tag; +/// use nom::branch::alt; +/// +/// fn parser(i: &str) -> IResult<&str, i64> { +/// precedence( +/// unary_op(1, tag("-")), +/// fail, +/// alt(( +/// binary_op(2, Assoc::Left, tag("*")), +/// binary_op(2, Assoc::Left, tag("/")), +/// binary_op(3, Assoc::Left, tag("+")), +/// binary_op(3, Assoc::Left, tag("-")), +/// )), +/// alt(( +/// map_res(digit1, |s: &str| s.parse::()), +/// delimited(tag("("), parser, tag(")")), +/// )), +/// |op: Operation<&str, &str, &str, i64>| { +/// use nom::precedence::Operation::*; +/// match op { +/// Prefix("-", o) => Ok(-o), +/// Binary(lhs, "*", rhs) => Ok(lhs * rhs), +/// Binary(lhs, "/", rhs) => Ok(lhs / rhs), +/// Binary(lhs, "+", rhs) => Ok(lhs + rhs), +/// Binary(lhs, "-", rhs) => Ok(lhs - rhs), +/// _ => Err("Invalid combination"), +/// } +/// } +/// )(i) +/// } +/// +/// assert_eq!(parser("8-2*2"), Ok(("", 4))); +/// assert_eq!(parser("4-(2+2)"), Ok(("", 0))); +/// assert_eq!(parser("3-(2*3)+7+2*2-(2*(2+4))"), Ok(("", -4))); +/// ``` +/// +/// # Evaluation order +/// This parser reads expressions from left to right and folds operations as soon as possible. This +/// behaviour is only important when using an operator grammar that allows for ambigious expressions. +/// +/// For example, the expression `-a++**b` is ambigious with the following precedence. +/// +/// | Operator | Position | Precedence | Associativity | +/// |----------|----------|------------|---------------| +/// | ** | Binary | 1 | Right | +/// | - | Prefix | 2 | N/A | +/// | ++ | Postfix | 3 | N/A | +/// +/// The expression can be parsed in two ways: `-((a++)**b)` or `((-a)++)**b`. This parser will always +/// parse it as the latter because of how it evaluates expressions: +/// * It reads, left-to-right, the first two operators `-a++`. +/// * Because the minus takes precedence over the increment it is evaluated immediately `(-a)++`. +/// * It then reads the remaining input and evaluates the increment next in order to preserve its +/// position in the expression \ +/// `((-a)++)**b`. +#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] +pub fn precedence( + mut prefix: H1, + mut postfix: H2, + mut binary: H3, + mut operand: F, + mut fold: G, +) -> impl FnMut(I) -> IResult +where + I: Clone + PartialEq, + E: ParseError + FromExternalError, + F: Parser, + G: FnMut(Operation) -> Result, + H1: Parser, E>, + H2: Parser, E>, + H3: Parser, E>, + Q: Ord + Copy, +{ + move |mut i| { + let mut operands = Vec::new(); + let mut operators = Vec::new(); + let mut i1 = i.clone(); + + 'main: loop { + 'prefix: loop { + match prefix.parse(i1.clone()) { + Err(Err::Error(_)) => break 'prefix, + Err(e) => return Err(e), + Ok((i2, o)) => { + // infinite loop check: the parser must always consume + if i2 == i1 { + return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence))); + } + i1 = i2; + operators.push(Operator::Prefix(o.value, o.precedence)); + } + } + } + + let (i2, o) = match operand.parse(i1.clone()) { + Ok((i, o)) => (i, o), + Err(Err::Error(e)) => return Err(Err::Error(E::append(i, ErrorKind::Precedence, e))), + Err(e) => return Err(e), + }; + i1 = i2; + operands.push(o); + + 'postfix: loop { + match postfix.parse(i1.clone()) { + Err(Err::Error(_)) => break 'postfix, + Err(e) => return Err(e), + Ok((i2, o)) => { + // infinite loop check: the parser must always consume + if i2 == i1 { + return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence))); + } + + while operators + .last() + .map(|op| op.precedence() <= o.precedence) + .unwrap_or(false) + { + let value = operands.pop().unwrap(); + let operation = match operators.pop().unwrap() { + Operator::Prefix(op, _) => Operation::Prefix(op, value), + Operator::Postfix(op, _) => Operation::Postfix(value, op), + Operator::Binary(op, _, _) => match operands.pop() { + Some(lhs) => Operation::Binary(lhs, op, value), + None => return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence))), + }, + }; + let result = match fold(operation) { + Err(e) => { + return Err(Err::Error(E::from_external_error( + i, + ErrorKind::Precedence, + e, + ))) + } + Ok(r) => r, + }; + operands.push(result); + } + i1 = i2; + operators.push(Operator::Postfix(o.value, o.precedence)); + } + } + } + + match binary.parse(i1.clone()) { + Err(Err::Error(_)) => break 'main, + Err(e) => return Err(e), + Ok((i2, o)) => { + while operators + .last() + .map(|op| { + op.precedence() < o.precedence + || (o.assoc == Assoc::Left && op.precedence() == o.precedence) + || (op.is_postfix()) + }) + .unwrap_or(false) + { + let value = operands.pop().unwrap(); + let operation = match operators.pop().unwrap() { + Operator::Prefix(op, _) => Operation::Prefix(op, value), + Operator::Postfix(op, _) => Operation::Postfix(value, op), + Operator::Binary(op, _, _) => match operands.pop() { + Some(lhs) => Operation::Binary(lhs, op, value), + None => return Err(Err::Error(E::from_error_kind(i1, ErrorKind::Precedence))), + }, + }; + let result = match fold(operation) { + Err(e) => { + return Err(Err::Error(E::from_external_error( + i, + ErrorKind::Precedence, + e, + ))) + } + Ok(r) => r, + }; + operands.push(result); + } + operators.push(Operator::Binary(o.value, o.precedence, o.assoc)); + i1 = i2; + } + } + + // infinite loop check: either operand or operator must consume input + if i == i1 { + return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence))); + } + i = i1.clone(); + } + + while operators.len() > 0 { + let value = match operands.pop() { + Some(o) => o, + None => return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence))), + }; + let operation = match operators.pop().unwrap() { + Operator::Prefix(op, _) => Operation::Prefix(op, value), + Operator::Postfix(op, _) => Operation::Postfix(value, op), + Operator::Binary(op, _, _) => match operands.pop() { + Some(lhs) => Operation::Binary(lhs, op, value), + None => return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence))), + }, + }; + let result = match fold(operation) { + Ok(r) => r, + Err(e) => { + return Err(Err::Error(E::from_external_error( + i, + ErrorKind::Precedence, + e, + ))) + } + }; + operands.push(result); + } + + if operands.len() == 1 { + return Ok((i1, operands.pop().unwrap())); + } else { + return Err(Err::Error(E::from_error_kind(i, ErrorKind::Precedence))); + } + } +} diff --git a/src/precedence/tests.rs b/src/precedence/tests.rs new file mode 100644 index 000000000..f697730cf --- /dev/null +++ b/src/precedence/tests.rs @@ -0,0 +1,75 @@ +use crate::precedence::{binary_op, unary_op, Assoc, Operation}; +use crate::{ + branch::alt, + bytes::complete::tag, + character::complete::digit1, + combinator::{map_res, fail}, + internal::{Err, IResult}, + sequence::delimited, + error::ErrorKind, +}; + +#[cfg(feature = "alloc")] +use crate::precedence::precedence; + +#[cfg(feature = "alloc")] +fn parser(i: &str) -> IResult<&str, i64> { + precedence( + unary_op(1, tag("-")), + fail, + alt(( + binary_op(2, Assoc::Left, tag("*")), + binary_op(2, Assoc::Left, tag("/")), + binary_op(3, Assoc::Left, tag("+")), + binary_op(3, Assoc::Left, tag("-")), + )), + alt(( + map_res(digit1, |s: &str| s.parse::()), + delimited(tag("("), parser, tag(")")), + )), + |op: Operation<&str, (), &str, i64>| { + use crate::precedence::Operation::*; + match op { + Prefix("-", o) => Ok(-o), + Binary(lhs, "*", rhs) => Ok(lhs * rhs), + Binary(lhs, "/", rhs) => Ok(lhs / rhs), + Binary(lhs, "+", rhs) => Ok(lhs + rhs), + Binary(lhs, "-", rhs) => Ok(lhs - rhs), + _ => Err("Invalid combination"), + } + }, + )(i) +} + +#[test] +#[cfg(feature = "alloc")] +fn precedence_test() { + assert_eq!(parser("3"), Ok(("", 3))); + assert_eq!(parser("-3"), Ok(("", -3))); + assert_eq!(parser("4-(2*2)"), Ok(("", 0))); + assert_eq!(parser("4-2*2"), Ok(("", 0))); + assert_eq!(parser("(4-2)*2"), Ok(("", 4))); + assert_eq!(parser("2*2/1"), Ok(("", 4))); + + let a = "a"; + + assert_eq!( + parser(a), + Err(Err::Error(error_node_position!( + &a[..], + ErrorKind::Precedence, + error_position!(&a[..], ErrorKind::Tag) + ))) + ); + + let b = "3+b"; + + assert_eq!( + parser(b), + Err(Err::Error(error_node_position!( + &b[2..], + ErrorKind::Precedence, + error_position!(&b[2..], ErrorKind::Tag) + ))) + ); +} diff --git a/tests/expression_ast.rs b/tests/expression_ast.rs new file mode 100644 index 000000000..19ced17e0 --- /dev/null +++ b/tests/expression_ast.rs @@ -0,0 +1,158 @@ +use nom::{ + branch::alt, + bytes::complete::tag, + character::complete::{digit1 as digit, alphanumeric1 as alphanumeric}, + combinator::{map_res, map}, + multi::separated_list0, + sequence::delimited, + IResult, + precedence::{precedence, Assoc, binary_op, unary_op, Operation}, +}; + +// Elements of the abstract syntax tree (ast) that represents an expression. +#[derive(Debug)] +pub enum Expr { + // A number literal. + Num(i64), + // An identifier. + Iden(String), + // Arithmetic operations. Each have a left hand side (lhs) and a right hand side (rhs). + Add(Box, Box), + Sub(Box, Box), + Mul(Box, Box), + Div(Box, Box), + // The function call operation. Left is the expression the function is called on, right is the list of parameters. + Call(Box, Vec), + // The ternary operator, the expressions from left to right are: The condition, the true case, the false case. + Tern(Box, Box, Box), +} + +// Prefix operators. +enum PrefixOp { + Identity, // + + Negate, // - +} + +// Postfix operators. +enum PostfixOp { + // The function call operator. In addition to its own representation "()" it carries additional information that we need to keep here. + // Specifically the vector of expressions that make up the parameters. + Call(Vec), // () +} + +// Binary operators. +enum BinaryOp { + Addition, // + + Subtraction, // - + Multiplication, // * + Division, // / + // The ternary operator can contain a single expression. + Ternary(Expr), // ?: +} + +// Parser for function calls. +fn function_call(i: &str) -> IResult<&str, PostfixOp> { + map( + delimited( + tag("("), + // Subexpressions are evaluated by recursing back into the expression parser. + separated_list0(tag(","), expression), + tag(")") + ), + |v: Vec| PostfixOp::Call(v) + )(i) +} + +// The ternary operator is actually just a binary operator that contains another expression. So it can be +// handled similarly to the function call operator except its in a binary position and can only contain +// a single expression. +// +// For example the expression "a IResult<&str, BinaryOp> { + map( + delimited( + tag("?"), + expression, + tag(":") + ), + |e: Expr| BinaryOp::Ternary(e) + )(i) +} + +// The actual expression parser . +fn expression(i: &str) -> IResult<&str, Expr> { + precedence( + alt(( + unary_op(2, map(tag("+"), |_| PrefixOp::Identity)), + unary_op(2, map(tag("-"), |_| PrefixOp::Negate)), + )), + // Function calls are implemented as postfix unary operators. + unary_op(1, function_call), + alt(( + binary_op(3, Assoc::Left, alt(( + map(tag("*"), |_| BinaryOp::Multiplication), + map(tag("/"), |_| BinaryOp::Division), + ))), + binary_op(4, Assoc::Left, alt(( + map(tag("+"), |_| BinaryOp::Addition), + map(tag("-"), |_| BinaryOp::Subtraction), + ))), + // Ternary operators are just binary operators with a subexpression. + binary_op(5, Assoc::Right, ternary_operator), + )), + alt(( + map_res(digit, + |s: &str| match s.parse::() { + Ok(s) => Ok(Expr::Num(s)), + Err(e) => Err(e), + } + ), + map(alphanumeric, |s: &str| Expr::Iden(s.to_string())), + delimited(tag("("), expression, tag(")")), + )), + |op: Operation| -> Result { + use nom::precedence::Operation::*; + use PrefixOp::*; + use PostfixOp::*; + use BinaryOp::*; + match op { + // The identity operator (prefix +) is ignored. + Prefix(Identity, e) => Ok(e), + + // Unary minus gets evaluated to the same representation as a multiplication with -1. + Prefix(Negate, e) => Ok(Expr::Mul(Expr::Num(-1).into(), e.into())), + + // The list of parameters are taken from the operator and placed into the ast. + Postfix(e, Call(p)) => Ok(Expr::Call(e.into(), p)), + + // Meaning is assigned to the expressions of the ternary operator during evaluation. + // The lhs becomes the condition, the contained expression is the true case, rhs the false case. + Binary(lhs, Ternary(e), rhs) => Ok(Expr::Tern(lhs.into(), e.into(), rhs.into())), + + // Raw operators get turned into their respective ast nodes. + Binary(lhs, Multiplication, rhs) => Ok(Expr::Mul(lhs.into(), rhs.into())), + Binary(lhs, Division, rhs) => Ok(Expr::Div(lhs.into(), rhs.into())), + Binary(lhs, Addition, rhs) => Ok(Expr::Add(lhs.into(), rhs.into())), + Binary(lhs, Subtraction, rhs) => Ok(Expr::Sub(lhs.into(), rhs.into())), + } + } + )(i) +} + +#[test] +fn expression_test() { + assert_eq!( + expression("-2*max(2,3)-2").map(|(i, x)| (i, format!("{:?}", x))), + Ok(("", String::from("Sub(Mul(Mul(Num(-1), Num(2)), Call(Iden(\"max\"), [Num(2), Num(3)])), Num(2))"))) + ); + + assert_eq!( + expression("a?2+c:-2*2").map(|(i, x)| (i, format!("{:?}", x))), + Ok(("", String::from("Tern(Iden(\"a\"), Add(Num(2), Iden(\"c\")), Mul(Mul(Num(-1), Num(2)), Num(2)))"))) + ); +} From f1abba64a7384a29602ee0df00b4809e1fd39f2a Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Sun, 8 Dec 2024 17:23:38 +0100 Subject: [PATCH 4/8] fix precedence parsing --- src/precedence/mod.rs | 38 ++++++-------- src/precedence/tests.rs | 14 ++--- tests/expression_ast.rs | 113 ++++++++++++++++++++++------------------ 3 files changed, 85 insertions(+), 80 deletions(-) diff --git a/src/precedence/mod.rs b/src/precedence/mod.rs index c518fdff3..22c9cc1ef 100644 --- a/src/precedence/mod.rs +++ b/src/precedence/mod.rs @@ -1,5 +1,5 @@ //! Combinators to parse expressions with operator precedence. -#![cfg(feature="alloc")] +#![cfg(feature = "alloc")] #![cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] #[cfg(test)] @@ -79,17 +79,11 @@ pub fn unary_op( mut parser: P, ) -> impl FnMut(I) -> IResult, E> where - P: Parser, + P: Parser, Q: Ord + Copy, { move |input| match parser.parse(input) { - Ok((i, value)) => Ok(( - i, - Unary { - value, - precedence, - }, - )), + Ok((i, value)) => Ok((i, Unary { value, precedence })), Err(e) => Err(e), } } @@ -107,7 +101,7 @@ pub fn binary_op( mut parser: P, ) -> impl FnMut(I) -> IResult, E> where - P: Parser, + P: Parser, Q: Ord + Copy, { move |input| match parser.parse(input) { @@ -124,7 +118,7 @@ where } /// Parses an expression with operator precedence. -/// +/// /// Supports prefix, postfix and binary operators. Operators are applied in ascending precedence. /// /// The parser will track its current position inside the expression and call the respective @@ -146,7 +140,7 @@ where /// * `binary` Parser for binary operators. /// * `operand` Parser for operands. /// * `fold` Function that evaluates a single operation and returns the result. -/// +/// /// # Example /// ```rust /// # use nom::{Err, error::{Error, ErrorKind}, IResult}; @@ -156,11 +150,11 @@ where /// use nom::sequence::delimited; /// use nom::bytes::complete::tag; /// use nom::branch::alt; -/// +/// /// fn parser(i: &str) -> IResult<&str, i64> { /// precedence( /// unary_op(1, tag("-")), -/// fail, +/// fail(), /// alt(( /// binary_op(2, Assoc::Left, tag("*")), /// binary_op(2, Assoc::Left, tag("/")), @@ -189,19 +183,19 @@ where /// assert_eq!(parser("4-(2+2)"), Ok(("", 0))); /// assert_eq!(parser("3-(2*3)+7+2*2-(2*(2+4))"), Ok(("", -4))); /// ``` -/// +/// /// # Evaluation order /// This parser reads expressions from left to right and folds operations as soon as possible. This /// behaviour is only important when using an operator grammar that allows for ambigious expressions. -/// +/// /// For example, the expression `-a++**b` is ambigious with the following precedence. -/// +/// /// | Operator | Position | Precedence | Associativity | /// |----------|----------|------------|---------------| /// | ** | Binary | 1 | Right | /// | - | Prefix | 2 | N/A | /// | ++ | Postfix | 3 | N/A | -/// +/// /// The expression can be parsed in two ways: `-((a++)**b)` or `((-a)++)**b`. This parser will always /// parse it as the latter because of how it evaluates expressions: /// * It reads, left-to-right, the first two operators `-a++`. @@ -220,11 +214,11 @@ pub fn precedence( where I: Clone + PartialEq, E: ParseError + FromExternalError, - F: Parser, + F: Parser, G: FnMut(Operation) -> Result, - H1: Parser, E>, - H2: Parser, E>, - H3: Parser, E>, + H1: Parser, Error = E>, + H2: Parser, Error = E>, + H3: Parser, Error = E>, Q: Ord + Copy, { move |mut i| { diff --git a/src/precedence/tests.rs b/src/precedence/tests.rs index f697730cf..04a86be0e 100644 --- a/src/precedence/tests.rs +++ b/src/precedence/tests.rs @@ -3,10 +3,10 @@ use crate::{ branch::alt, bytes::complete::tag, character::complete::digit1, - combinator::{map_res, fail}, + combinator::{fail, map_res}, + error::ErrorKind, internal::{Err, IResult}, sequence::delimited, - error::ErrorKind, }; #[cfg(feature = "alloc")] @@ -16,7 +16,7 @@ use crate::precedence::precedence; fn parser(i: &str) -> IResult<&str, i64> { precedence( unary_op(1, tag("-")), - fail, + fail(), alt(( binary_op(2, Assoc::Left, tag("*")), binary_op(2, Assoc::Left, tag("/")), @@ -50,9 +50,9 @@ fn precedence_test() { assert_eq!(parser("4-2*2"), Ok(("", 0))); assert_eq!(parser("(4-2)*2"), Ok(("", 4))); assert_eq!(parser("2*2/1"), Ok(("", 4))); - + let a = "a"; - + assert_eq!( parser(a), Err(Err::Error(error_node_position!( @@ -61,9 +61,9 @@ fn precedence_test() { error_position!(&a[..], ErrorKind::Tag) ))) ); - + let b = "3+b"; - + assert_eq!( parser(b), Err(Err::Error(error_node_position!( diff --git a/tests/expression_ast.rs b/tests/expression_ast.rs index 19ced17e0..0e4fbd987 100644 --- a/tests/expression_ast.rs +++ b/tests/expression_ast.rs @@ -1,12 +1,12 @@ use nom::{ branch::alt, bytes::complete::tag, - character::complete::{digit1 as digit, alphanumeric1 as alphanumeric}, - combinator::{map_res, map}, + character::complete::{alphanumeric1 as alphanumeric, digit1 as digit}, + combinator::{map, map_res}, multi::separated_list0, + precedence::{binary_op, precedence, unary_op, Assoc, Operation}, sequence::delimited, - IResult, - precedence::{precedence, Assoc, binary_op, unary_op, Operation}, + IResult, Parser, }; // Elements of the abstract syntax tree (ast) that represents an expression. @@ -29,25 +29,25 @@ pub enum Expr { // Prefix operators. enum PrefixOp { - Identity, // + - Negate, // - + Identity, // + + Negate, // - } // Postfix operators. enum PostfixOp { // The function call operator. In addition to its own representation "()" it carries additional information that we need to keep here. // Specifically the vector of expressions that make up the parameters. - Call(Vec), // () + Call(Vec), // () } // Binary operators. enum BinaryOp { - Addition, // + - Subtraction, // - - Multiplication, // * - Division, // / + Addition, // + + Subtraction, // - + Multiplication, // * + Division, // / // The ternary operator can contain a single expression. - Ternary(Expr), // ?: + Ternary(Expr), // ?: } // Parser for function calls. @@ -57,16 +57,17 @@ fn function_call(i: &str) -> IResult<&str, PostfixOp> { tag("("), // Subexpressions are evaluated by recursing back into the expression parser. separated_list0(tag(","), expression), - tag(")") + tag(")"), ), - |v: Vec| PostfixOp::Call(v) - )(i) + |v: Vec| PostfixOp::Call(v), + ) + .parse(i) } // The ternary operator is actually just a binary operator that contains another expression. So it can be // handled similarly to the function call operator except its in a binary position and can only contain // a single expression. -// +// // For example the expression "a IResult<&str, PostfixOp> { // subexpression is contained within the operator in the same way that the function call operator // contains subexpressions. fn ternary_operator(i: &str) -> IResult<&str, BinaryOp> { - map( - delimited( - tag("?"), - expression, - tag(":") - ), - |e: Expr| BinaryOp::Ternary(e) - )(i) + map(delimited(tag("?"), expression, tag(":")), |e: Expr| { + BinaryOp::Ternary(e) + }) + .parse(i) } // The actual expression parser . @@ -94,53 +91,59 @@ fn expression(i: &str) -> IResult<&str, Expr> { // Function calls are implemented as postfix unary operators. unary_op(1, function_call), alt(( - binary_op(3, Assoc::Left, alt(( - map(tag("*"), |_| BinaryOp::Multiplication), - map(tag("/"), |_| BinaryOp::Division), - ))), - binary_op(4, Assoc::Left, alt(( - map(tag("+"), |_| BinaryOp::Addition), - map(tag("-"), |_| BinaryOp::Subtraction), - ))), + binary_op( + 3, + Assoc::Left, + alt(( + map(tag("*"), |_| BinaryOp::Multiplication), + map(tag("/"), |_| BinaryOp::Division), + )), + ), + binary_op( + 4, + Assoc::Left, + alt(( + map(tag("+"), |_| BinaryOp::Addition), + map(tag("-"), |_| BinaryOp::Subtraction), + )), + ), // Ternary operators are just binary operators with a subexpression. binary_op(5, Assoc::Right, ternary_operator), )), alt(( - map_res(digit, - |s: &str| match s.parse::() { - Ok(s) => Ok(Expr::Num(s)), - Err(e) => Err(e), - } - ), + map_res(digit, |s: &str| match s.parse::() { + Ok(s) => Ok(Expr::Num(s)), + Err(e) => Err(e), + }), map(alphanumeric, |s: &str| Expr::Iden(s.to_string())), delimited(tag("("), expression, tag(")")), )), |op: Operation| -> Result { use nom::precedence::Operation::*; - use PrefixOp::*; - use PostfixOp::*; use BinaryOp::*; + use PostfixOp::*; + use PrefixOp::*; match op { // The identity operator (prefix +) is ignored. Prefix(Identity, e) => Ok(e), - + // Unary minus gets evaluated to the same representation as a multiplication with -1. Prefix(Negate, e) => Ok(Expr::Mul(Expr::Num(-1).into(), e.into())), - + // The list of parameters are taken from the operator and placed into the ast. Postfix(e, Call(p)) => Ok(Expr::Call(e.into(), p)), - + // Meaning is assigned to the expressions of the ternary operator during evaluation. // The lhs becomes the condition, the contained expression is the true case, rhs the false case. Binary(lhs, Ternary(e), rhs) => Ok(Expr::Tern(lhs.into(), e.into(), rhs.into())), - + // Raw operators get turned into their respective ast nodes. Binary(lhs, Multiplication, rhs) => Ok(Expr::Mul(lhs.into(), rhs.into())), Binary(lhs, Division, rhs) => Ok(Expr::Div(lhs.into(), rhs.into())), Binary(lhs, Addition, rhs) => Ok(Expr::Add(lhs.into(), rhs.into())), Binary(lhs, Subtraction, rhs) => Ok(Expr::Sub(lhs.into(), rhs.into())), } - } + }, )(i) } @@ -148,11 +151,19 @@ fn expression(i: &str) -> IResult<&str, Expr> { fn expression_test() { assert_eq!( expression("-2*max(2,3)-2").map(|(i, x)| (i, format!("{:?}", x))), - Ok(("", String::from("Sub(Mul(Mul(Num(-1), Num(2)), Call(Iden(\"max\"), [Num(2), Num(3)])), Num(2))"))) + Ok(( + "", + String::from("Sub(Mul(Mul(Num(-1), Num(2)), Call(Iden(\"max\"), [Num(2), Num(3)])), Num(2))") + )) + ); + + assert_eq!( + expression("a?2+c:-2*2").map(|(i, x)| (i, format!("{:?}", x))), + Ok(( + "", + String::from( + "Tern(Iden(\"a\"), Add(Num(2), Iden(\"c\")), Mul(Mul(Num(-1), Num(2)), Num(2)))" + ) + )) ); - - assert_eq!( - expression("a?2+c:-2*2").map(|(i, x)| (i, format!("{:?}", x))), - Ok(("", String::from("Tern(Iden(\"a\"), Add(Num(2), Iden(\"c\")), Mul(Mul(Num(-1), Num(2)), Num(2)))"))) - ); } From cb3b5b8ebbbca2efe81ae4089541bab322bc3bfe Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Sun, 8 Dec 2024 17:29:34 +0100 Subject: [PATCH 5/8] move precedence parsing to nom-language --- nom-language/src/lib.rs | 1 + {src => nom-language/src}/precedence/mod.rs | 8 ++------ {src => nom-language/src}/precedence/tests.rs | 8 +++----- src/lib.rs | 2 -- tests/expression_ast.rs | 4 ++-- 5 files changed, 8 insertions(+), 15 deletions(-) rename {src => nom-language/src}/precedence/mod.rs (97%) rename {src => nom-language/src}/precedence/tests.rs (93%) diff --git a/nom-language/src/lib.rs b/nom-language/src/lib.rs index 01b2862e6..c018f3198 100644 --- a/nom-language/src/lib.rs +++ b/nom-language/src/lib.rs @@ -6,3 +6,4 @@ //! at language parsing. pub mod error; +pub mod precedence; diff --git a/src/precedence/mod.rs b/nom-language/src/precedence/mod.rs similarity index 97% rename from src/precedence/mod.rs rename to nom-language/src/precedence/mod.rs index 22c9cc1ef..d3f377987 100644 --- a/src/precedence/mod.rs +++ b/nom-language/src/precedence/mod.rs @@ -1,13 +1,10 @@ //! Combinators to parse expressions with operator precedence. -#![cfg(feature = "alloc")] -#![cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] #[cfg(test)] mod tests; -use crate::error::{ErrorKind, FromExternalError, ParseError}; -use crate::lib::std::vec::Vec; -use crate::{Err, IResult, Parser}; +use nom::error::{ErrorKind, FromExternalError, ParseError}; +use nom::{Err, IResult, Parser}; /// An unary operator. pub struct Unary { @@ -203,7 +200,6 @@ where /// * It then reads the remaining input and evaluates the increment next in order to preserve its /// position in the expression \ /// `((-a)++)**b`. -#[cfg_attr(feature = "docsrs", doc(cfg(feature = "alloc")))] pub fn precedence( mut prefix: H1, mut postfix: H2, diff --git a/src/precedence/tests.rs b/nom-language/src/precedence/tests.rs similarity index 93% rename from src/precedence/tests.rs rename to nom-language/src/precedence/tests.rs index 04a86be0e..fc8bf63a9 100644 --- a/src/precedence/tests.rs +++ b/nom-language/src/precedence/tests.rs @@ -1,18 +1,17 @@ use crate::precedence::{binary_op, unary_op, Assoc, Operation}; -use crate::{ +use nom::{ branch::alt, bytes::complete::tag, character::complete::digit1, combinator::{fail, map_res}, error::ErrorKind, - internal::{Err, IResult}, + error_node_position, error_position, sequence::delimited, + Err, IResult, }; -#[cfg(feature = "alloc")] use crate::precedence::precedence; -#[cfg(feature = "alloc")] fn parser(i: &str) -> IResult<&str, i64> { precedence( unary_op(1, tag("-")), @@ -42,7 +41,6 @@ fn parser(i: &str) -> IResult<&str, i64> { } #[test] -#[cfg(feature = "alloc")] fn precedence_test() { assert_eq!(parser("3"), Ok(("", 3))); assert_eq!(parser("-3"), Ok(("", -3))); diff --git a/src/lib.rs b/src/lib.rs index db4c8703b..c82715a3b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -451,8 +451,6 @@ pub mod bytes; pub mod character; -pub mod precedence; - mod str; pub mod number; diff --git a/tests/expression_ast.rs b/tests/expression_ast.rs index 0e4fbd987..0d04c9bb0 100644 --- a/tests/expression_ast.rs +++ b/tests/expression_ast.rs @@ -4,10 +4,10 @@ use nom::{ character::complete::{alphanumeric1 as alphanumeric, digit1 as digit}, combinator::{map, map_res}, multi::separated_list0, - precedence::{binary_op, precedence, unary_op, Assoc, Operation}, sequence::delimited, IResult, Parser, }; +use nom_language::precedence::{binary_op, precedence, unary_op, Assoc, Operation}; // Elements of the abstract syntax tree (ast) that represents an expression. #[derive(Debug)] @@ -119,7 +119,7 @@ fn expression(i: &str) -> IResult<&str, Expr> { delimited(tag("("), expression, tag(")")), )), |op: Operation| -> Result { - use nom::precedence::Operation::*; + use nom_language::precedence::Operation::*; use BinaryOp::*; use PostfixOp::*; use PrefixOp::*; From 9b0e18cf5a75964a3d4a841292c0aacf6a513c39 Mon Sep 17 00:00:00 2001 From: Rodrigo Rivas Costa Date: Sun, 8 Dec 2024 17:37:25 +0100 Subject: [PATCH 6/8] Implement new parser `left_assoc`. (#1775) Co-authored-by: Geoffroy Couprie --- src/multi/mod.rs | 135 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) diff --git a/src/multi/mod.rs b/src/multi/mod.rs index d78611f99..4ca65b7e9 100644 --- a/src/multi/mod.rs +++ b/src/multi/mod.rs @@ -1868,3 +1868,138 @@ where Ok((input, acc)) } } + +/// Applies a parser multiple times separated by another parser. +/// +/// It is similar to [`separated_list1`][crate::multi::separated_list1] but instead of collecting +/// into a vector, you have a callback to build the output. +/// +/// In a LALR grammar a left recursive operator is usually built with a rule syntax such as: +/// * A := A op B | B +/// +/// If you try to parse that wth [`alt`][crate::branch::alt] it will fail with a stack overflow +/// because the recusion is unlimited. This function solves this problem by converting the recusion +/// into an iteration. +/// +/// Compare with a right recursive operator, that in LALR would be: +/// * A := B op A | B +/// Or equivalently: +/// * A := B (op A)? +/// +/// That can be written in `nom` trivially. +/// +/// This stops when either parser returns [`err::error`] and returns the last built value. to instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `child` The parser to apply. +/// * `operator` Parses the operator between argument. +/// * `init` A function returning the initial value. +/// * `fold` The function that combines a result of `f` with +/// the current accumulator. +/// ```rust +/// # #[macro_use] extern crate nom; +/// # use nom::{Err, error::ErrorKind, Needed, IResult, Parser}; +/// use nom::multi::left_assoc; +/// use nom::branch::alt; +/// use nom::sequence::delimited; +/// use nom::character::complete::{char, digit1}; +/// +/// fn add(i: &str) -> IResult<&str, String> { +/// left_assoc(mult, char('+'), |a, o, b| format!("{o}{a}{b}")).parse(i) +/// } +/// fn mult(i: &str) -> IResult<&str, String> { +/// left_assoc(single, char('*'), |a, o, b| format!("{o}{a}{b}")).parse(i) +/// } +/// fn single(i: &str) -> IResult<&str, String> { +/// alt(( +/// digit1.map(|x: &str| x.to_string()), +/// delimited(char('('), add, char(')')) +/// )).parse(i) +/// } +/// +/// assert_eq!(single("(1+2*3)"), Ok(("", String::from("+1*23")))); +/// assert_eq!(single("((1+2)*3)"), Ok(("", String::from("*+123")))); +/// assert_eq!(single("(1*2+3)"), Ok(("", String::from("+*123")))); +/// assert_eq!(single("((1+2*3)+4)"), Ok(("", String::from("++1*234")))); +/// assert_eq!(single("(1+(2*3+4))"), Ok(("", String::from("+1+*234")))); +/// ``` +pub fn left_assoc( + child: F, + operator: G, + builder: B, +) -> impl Parser +where + I: Clone + Input, + E: ParseError, + F: Parser, + G: Parser, + B: FnMut(O, OP, O) -> O, +{ + LeftAssoc { + child, + operator, + builder, + } +} + +/// Parser implementation for the [separated_list1] combinator +pub struct LeftAssoc { + child: F, + operator: G, + builder: B, +} + +impl Parser for LeftAssoc +where + I: Clone + Input, + E: ParseError, + F: Parser, + G: Parser, + B: FnMut(O, OP, O) -> O, +{ + type Output = O; + type Error = E; + + fn process( + &mut self, + mut i: I, + ) -> crate::PResult { + let (i1, mut res) = self.child.process::(i)?; + i = i1; + + loop { + let len = i.input_len(); + match self + .operator + .process::>(i.clone()) + { + Err(Err::Error(_)) => return Ok((i, res)), + Err(Err::Failure(e)) => return Err(Err::Failure(e)), + Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), + Ok((i1, op)) => { + match self + .child + .process::>(i1.clone()) + { + Err(Err::Error(_)) => return Ok((i, res)), + Err(Err::Failure(e)) => return Err(Err::Failure(e)), + Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), + Ok((i2, rhs)) => { + // infinite loop check: the parser must always consume + if i2.input_len() == len { + return Err(Err::Error(OM::Error::bind(|| { + >::Error::from_error_kind(i, ErrorKind::SeparatedList) + }))); + } + // there is no combine() with 3 arguments, fake it with a tuple and two calls + let op_rhs = OM::Output::combine(op, rhs, |op, rhs| (op, rhs)); + res = OM::Output::combine(res, op_rhs, |lhs, (op, rhs)| (self.builder)(lhs, op, rhs)); + i = i2; + } + } + } + } + } + } +} From bcc1b0cb64bcbf82ab3f35e32eec1429129e5d02 Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Sun, 8 Dec 2024 17:39:37 +0100 Subject: [PATCH 7/8] move the left_assoc combinator to nom-language --- nom-language/src/precedence/mod.rs | 137 ++++++++++++++++++++++++++++- src/multi/mod.rs | 135 ---------------------------- 2 files changed, 136 insertions(+), 136 deletions(-) diff --git a/nom-language/src/precedence/mod.rs b/nom-language/src/precedence/mod.rs index d3f377987..6dc467c30 100644 --- a/nom-language/src/precedence/mod.rs +++ b/nom-language/src/precedence/mod.rs @@ -4,7 +4,7 @@ mod tests; use nom::error::{ErrorKind, FromExternalError, ParseError}; -use nom::{Err, IResult, Parser}; +use nom::{Check, Err, IResult, Input, Mode, OutputM, OutputMode, Parser}; /// An unary operator. pub struct Unary { @@ -367,3 +367,138 @@ where } } } + +/// Applies a parser multiple times separated by another parser. +/// +/// It is similar to [`separated_list1`][crate::multi::separated_list1] but instead of collecting +/// into a vector, you have a callback to build the output. +/// +/// In a LALR grammar a left recursive operator is usually built with a rule syntax such as: +/// * A := A op B | B +/// +/// If you try to parse that wth [`alt`][crate::branch::alt] it will fail with a stack overflow +/// because the recusion is unlimited. This function solves this problem by converting the recusion +/// into an iteration. +/// +/// Compare with a right recursive operator, that in LALR would be: +/// * A := B op A | B +/// Or equivalently: +/// * A := B (op A)? +/// +/// That can be written in `nom` trivially. +/// +/// This stops when either parser returns [`err::error`] and returns the last built value. to instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `child` The parser to apply. +/// * `operator` Parses the operator between argument. +/// * `init` A function returning the initial value. +/// * `fold` The function that combines a result of `f` with +/// the current accumulator. +/// ```rust +/// # #[macro_use] extern crate nom; +/// # use nom::{Err, error::ErrorKind, Needed, IResult, Parser}; +/// use nom::multi::left_assoc; +/// use nom::branch::alt; +/// use nom::sequence::delimited; +/// use nom::character::complete::{char, digit1}; +/// +/// fn add(i: &str) -> IResult<&str, String> { +/// left_assoc(mult, char('+'), |a, o, b| format!("{o}{a}{b}")).parse(i) +/// } +/// fn mult(i: &str) -> IResult<&str, String> { +/// left_assoc(single, char('*'), |a, o, b| format!("{o}{a}{b}")).parse(i) +/// } +/// fn single(i: &str) -> IResult<&str, String> { +/// alt(( +/// digit1.map(|x: &str| x.to_string()), +/// delimited(char('('), add, char(')')) +/// )).parse(i) +/// } +/// +/// assert_eq!(single("(1+2*3)"), Ok(("", String::from("+1*23")))); +/// assert_eq!(single("((1+2)*3)"), Ok(("", String::from("*+123")))); +/// assert_eq!(single("(1*2+3)"), Ok(("", String::from("+*123")))); +/// assert_eq!(single("((1+2*3)+4)"), Ok(("", String::from("++1*234")))); +/// assert_eq!(single("(1+(2*3+4))"), Ok(("", String::from("+1+*234")))); +/// ``` +pub fn left_assoc( + child: F, + operator: G, + builder: B, +) -> impl Parser +where + I: Clone + Input, + E: ParseError, + F: Parser, + G: Parser, + B: FnMut(O, OP, O) -> O, +{ + LeftAssoc { + child, + operator, + builder, + } +} + +/// Parser implementation for the [separated_list1] combinator +pub struct LeftAssoc { + child: F, + operator: G, + builder: B, +} + +impl Parser for LeftAssoc +where + I: Clone + Input, + E: ParseError, + F: Parser, + G: Parser, + B: FnMut(O, OP, O) -> O, +{ + type Output = O; + type Error = E; + + fn process( + &mut self, + mut i: I, + ) -> nom::PResult { + let (i1, mut res) = self.child.process::(i)?; + i = i1; + + loop { + let len = i.input_len(); + match self + .operator + .process::>(i.clone()) + { + Err(Err::Error(_)) => return Ok((i, res)), + Err(Err::Failure(e)) => return Err(Err::Failure(e)), + Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), + Ok((i1, op)) => { + match self + .child + .process::>(i1.clone()) + { + Err(Err::Error(_)) => return Ok((i, res)), + Err(Err::Failure(e)) => return Err(Err::Failure(e)), + Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), + Ok((i2, rhs)) => { + // infinite loop check: the parser must always consume + if i2.input_len() == len { + return Err(Err::Error(OM::Error::bind(|| { + >::Error::from_error_kind(i, ErrorKind::SeparatedList) + }))); + } + // there is no combine() with 3 arguments, fake it with a tuple and two calls + let op_rhs = OM::Output::combine(op, rhs, |op, rhs| (op, rhs)); + res = OM::Output::combine(res, op_rhs, |lhs, (op, rhs)| (self.builder)(lhs, op, rhs)); + i = i2; + } + } + } + } + } + } +} diff --git a/src/multi/mod.rs b/src/multi/mod.rs index 4ca65b7e9..d78611f99 100644 --- a/src/multi/mod.rs +++ b/src/multi/mod.rs @@ -1868,138 +1868,3 @@ where Ok((input, acc)) } } - -/// Applies a parser multiple times separated by another parser. -/// -/// It is similar to [`separated_list1`][crate::multi::separated_list1] but instead of collecting -/// into a vector, you have a callback to build the output. -/// -/// In a LALR grammar a left recursive operator is usually built with a rule syntax such as: -/// * A := A op B | B -/// -/// If you try to parse that wth [`alt`][crate::branch::alt] it will fail with a stack overflow -/// because the recusion is unlimited. This function solves this problem by converting the recusion -/// into an iteration. -/// -/// Compare with a right recursive operator, that in LALR would be: -/// * A := B op A | B -/// Or equivalently: -/// * A := B (op A)? -/// -/// That can be written in `nom` trivially. -/// -/// This stops when either parser returns [`err::error`] and returns the last built value. to instead chain an error up, see -/// [`cut`][crate::combinator::cut]. -/// -/// # Arguments -/// * `child` The parser to apply. -/// * `operator` Parses the operator between argument. -/// * `init` A function returning the initial value. -/// * `fold` The function that combines a result of `f` with -/// the current accumulator. -/// ```rust -/// # #[macro_use] extern crate nom; -/// # use nom::{Err, error::ErrorKind, Needed, IResult, Parser}; -/// use nom::multi::left_assoc; -/// use nom::branch::alt; -/// use nom::sequence::delimited; -/// use nom::character::complete::{char, digit1}; -/// -/// fn add(i: &str) -> IResult<&str, String> { -/// left_assoc(mult, char('+'), |a, o, b| format!("{o}{a}{b}")).parse(i) -/// } -/// fn mult(i: &str) -> IResult<&str, String> { -/// left_assoc(single, char('*'), |a, o, b| format!("{o}{a}{b}")).parse(i) -/// } -/// fn single(i: &str) -> IResult<&str, String> { -/// alt(( -/// digit1.map(|x: &str| x.to_string()), -/// delimited(char('('), add, char(')')) -/// )).parse(i) -/// } -/// -/// assert_eq!(single("(1+2*3)"), Ok(("", String::from("+1*23")))); -/// assert_eq!(single("((1+2)*3)"), Ok(("", String::from("*+123")))); -/// assert_eq!(single("(1*2+3)"), Ok(("", String::from("+*123")))); -/// assert_eq!(single("((1+2*3)+4)"), Ok(("", String::from("++1*234")))); -/// assert_eq!(single("(1+(2*3+4))"), Ok(("", String::from("+1+*234")))); -/// ``` -pub fn left_assoc( - child: F, - operator: G, - builder: B, -) -> impl Parser -where - I: Clone + Input, - E: ParseError, - F: Parser, - G: Parser, - B: FnMut(O, OP, O) -> O, -{ - LeftAssoc { - child, - operator, - builder, - } -} - -/// Parser implementation for the [separated_list1] combinator -pub struct LeftAssoc { - child: F, - operator: G, - builder: B, -} - -impl Parser for LeftAssoc -where - I: Clone + Input, - E: ParseError, - F: Parser, - G: Parser, - B: FnMut(O, OP, O) -> O, -{ - type Output = O; - type Error = E; - - fn process( - &mut self, - mut i: I, - ) -> crate::PResult { - let (i1, mut res) = self.child.process::(i)?; - i = i1; - - loop { - let len = i.input_len(); - match self - .operator - .process::>(i.clone()) - { - Err(Err::Error(_)) => return Ok((i, res)), - Err(Err::Failure(e)) => return Err(Err::Failure(e)), - Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), - Ok((i1, op)) => { - match self - .child - .process::>(i1.clone()) - { - Err(Err::Error(_)) => return Ok((i, res)), - Err(Err::Failure(e)) => return Err(Err::Failure(e)), - Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), - Ok((i2, rhs)) => { - // infinite loop check: the parser must always consume - if i2.input_len() == len { - return Err(Err::Error(OM::Error::bind(|| { - >::Error::from_error_kind(i, ErrorKind::SeparatedList) - }))); - } - // there is no combine() with 3 arguments, fake it with a tuple and two calls - let op_rhs = OM::Output::combine(op, rhs, |op, rhs| (op, rhs)); - res = OM::Output::combine(res, op_rhs, |lhs, (op, rhs)| (self.builder)(lhs, op, rhs)); - i = i2; - } - } - } - } - } - } -} From 6a25312dbf4df8fe0ad1f1dc5de19843139d381e Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Sun, 8 Dec 2024 17:44:16 +0100 Subject: [PATCH 8/8] fix nom-language tests --- nom-language/src/precedence/mod.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/nom-language/src/precedence/mod.rs b/nom-language/src/precedence/mod.rs index 6dc467c30..3cfc262ab 100644 --- a/nom-language/src/precedence/mod.rs +++ b/nom-language/src/precedence/mod.rs @@ -141,7 +141,7 @@ where /// # Example /// ```rust /// # use nom::{Err, error::{Error, ErrorKind}, IResult}; -/// use nom::precedence::{precedence, unary_op, binary_op, Assoc, Operation}; +/// use nom_language::precedence::{precedence, unary_op, binary_op, Assoc, Operation}; /// use nom::character::complete::digit1; /// use nom::combinator::{map_res, fail}; /// use nom::sequence::delimited; @@ -163,7 +163,7 @@ where /// delimited(tag("("), parser, tag(")")), /// )), /// |op: Operation<&str, &str, &str, i64>| { -/// use nom::precedence::Operation::*; +/// use nom_language::precedence::Operation::*; /// match op { /// Prefix("-", o) => Ok(-o), /// Binary(lhs, "*", rhs) => Ok(lhs * rhs), @@ -399,7 +399,7 @@ where /// ```rust /// # #[macro_use] extern crate nom; /// # use nom::{Err, error::ErrorKind, Needed, IResult, Parser}; -/// use nom::multi::left_assoc; +/// use nom_language::precedence::left_assoc; /// use nom::branch::alt; /// use nom::sequence::delimited; /// use nom::character::complete::{char, digit1};