From bcc1b0cb64bcbf82ab3f35e32eec1429129e5d02 Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Sun, 8 Dec 2024 17:39:37 +0100 Subject: [PATCH] move the left_assoc combinator to nom-language --- nom-language/src/precedence/mod.rs | 137 ++++++++++++++++++++++++++++- src/multi/mod.rs | 135 ---------------------------- 2 files changed, 136 insertions(+), 136 deletions(-) diff --git a/nom-language/src/precedence/mod.rs b/nom-language/src/precedence/mod.rs index d3f37798..6dc467c3 100644 --- a/nom-language/src/precedence/mod.rs +++ b/nom-language/src/precedence/mod.rs @@ -4,7 +4,7 @@ mod tests; use nom::error::{ErrorKind, FromExternalError, ParseError}; -use nom::{Err, IResult, Parser}; +use nom::{Check, Err, IResult, Input, Mode, OutputM, OutputMode, Parser}; /// An unary operator. pub struct Unary { @@ -367,3 +367,138 @@ where } } } + +/// Applies a parser multiple times separated by another parser. +/// +/// It is similar to [`separated_list1`][crate::multi::separated_list1] but instead of collecting +/// into a vector, you have a callback to build the output. +/// +/// In a LALR grammar a left recursive operator is usually built with a rule syntax such as: +/// * A := A op B | B +/// +/// If you try to parse that wth [`alt`][crate::branch::alt] it will fail with a stack overflow +/// because the recusion is unlimited. This function solves this problem by converting the recusion +/// into an iteration. +/// +/// Compare with a right recursive operator, that in LALR would be: +/// * A := B op A | B +/// Or equivalently: +/// * A := B (op A)? +/// +/// That can be written in `nom` trivially. +/// +/// This stops when either parser returns [`err::error`] and returns the last built value. to instead chain an error up, see +/// [`cut`][crate::combinator::cut]. +/// +/// # Arguments +/// * `child` The parser to apply. +/// * `operator` Parses the operator between argument. +/// * `init` A function returning the initial value. +/// * `fold` The function that combines a result of `f` with +/// the current accumulator. +/// ```rust +/// # #[macro_use] extern crate nom; +/// # use nom::{Err, error::ErrorKind, Needed, IResult, Parser}; +/// use nom::multi::left_assoc; +/// use nom::branch::alt; +/// use nom::sequence::delimited; +/// use nom::character::complete::{char, digit1}; +/// +/// fn add(i: &str) -> IResult<&str, String> { +/// left_assoc(mult, char('+'), |a, o, b| format!("{o}{a}{b}")).parse(i) +/// } +/// fn mult(i: &str) -> IResult<&str, String> { +/// left_assoc(single, char('*'), |a, o, b| format!("{o}{a}{b}")).parse(i) +/// } +/// fn single(i: &str) -> IResult<&str, String> { +/// alt(( +/// digit1.map(|x: &str| x.to_string()), +/// delimited(char('('), add, char(')')) +/// )).parse(i) +/// } +/// +/// assert_eq!(single("(1+2*3)"), Ok(("", String::from("+1*23")))); +/// assert_eq!(single("((1+2)*3)"), Ok(("", String::from("*+123")))); +/// assert_eq!(single("(1*2+3)"), Ok(("", String::from("+*123")))); +/// assert_eq!(single("((1+2*3)+4)"), Ok(("", String::from("++1*234")))); +/// assert_eq!(single("(1+(2*3+4))"), Ok(("", String::from("+1+*234")))); +/// ``` +pub fn left_assoc( + child: F, + operator: G, + builder: B, +) -> impl Parser +where + I: Clone + Input, + E: ParseError, + F: Parser, + G: Parser, + B: FnMut(O, OP, O) -> O, +{ + LeftAssoc { + child, + operator, + builder, + } +} + +/// Parser implementation for the [separated_list1] combinator +pub struct LeftAssoc { + child: F, + operator: G, + builder: B, +} + +impl Parser for LeftAssoc +where + I: Clone + Input, + E: ParseError, + F: Parser, + G: Parser, + B: FnMut(O, OP, O) -> O, +{ + type Output = O; + type Error = E; + + fn process( + &mut self, + mut i: I, + ) -> nom::PResult { + let (i1, mut res) = self.child.process::(i)?; + i = i1; + + loop { + let len = i.input_len(); + match self + .operator + .process::>(i.clone()) + { + Err(Err::Error(_)) => return Ok((i, res)), + Err(Err::Failure(e)) => return Err(Err::Failure(e)), + Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), + Ok((i1, op)) => { + match self + .child + .process::>(i1.clone()) + { + Err(Err::Error(_)) => return Ok((i, res)), + Err(Err::Failure(e)) => return Err(Err::Failure(e)), + Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), + Ok((i2, rhs)) => { + // infinite loop check: the parser must always consume + if i2.input_len() == len { + return Err(Err::Error(OM::Error::bind(|| { + >::Error::from_error_kind(i, ErrorKind::SeparatedList) + }))); + } + // there is no combine() with 3 arguments, fake it with a tuple and two calls + let op_rhs = OM::Output::combine(op, rhs, |op, rhs| (op, rhs)); + res = OM::Output::combine(res, op_rhs, |lhs, (op, rhs)| (self.builder)(lhs, op, rhs)); + i = i2; + } + } + } + } + } + } +} diff --git a/src/multi/mod.rs b/src/multi/mod.rs index 4ca65b7e..d78611f9 100644 --- a/src/multi/mod.rs +++ b/src/multi/mod.rs @@ -1868,138 +1868,3 @@ where Ok((input, acc)) } } - -/// Applies a parser multiple times separated by another parser. -/// -/// It is similar to [`separated_list1`][crate::multi::separated_list1] but instead of collecting -/// into a vector, you have a callback to build the output. -/// -/// In a LALR grammar a left recursive operator is usually built with a rule syntax such as: -/// * A := A op B | B -/// -/// If you try to parse that wth [`alt`][crate::branch::alt] it will fail with a stack overflow -/// because the recusion is unlimited. This function solves this problem by converting the recusion -/// into an iteration. -/// -/// Compare with a right recursive operator, that in LALR would be: -/// * A := B op A | B -/// Or equivalently: -/// * A := B (op A)? -/// -/// That can be written in `nom` trivially. -/// -/// This stops when either parser returns [`err::error`] and returns the last built value. to instead chain an error up, see -/// [`cut`][crate::combinator::cut]. -/// -/// # Arguments -/// * `child` The parser to apply. -/// * `operator` Parses the operator between argument. -/// * `init` A function returning the initial value. -/// * `fold` The function that combines a result of `f` with -/// the current accumulator. -/// ```rust -/// # #[macro_use] extern crate nom; -/// # use nom::{Err, error::ErrorKind, Needed, IResult, Parser}; -/// use nom::multi::left_assoc; -/// use nom::branch::alt; -/// use nom::sequence::delimited; -/// use nom::character::complete::{char, digit1}; -/// -/// fn add(i: &str) -> IResult<&str, String> { -/// left_assoc(mult, char('+'), |a, o, b| format!("{o}{a}{b}")).parse(i) -/// } -/// fn mult(i: &str) -> IResult<&str, String> { -/// left_assoc(single, char('*'), |a, o, b| format!("{o}{a}{b}")).parse(i) -/// } -/// fn single(i: &str) -> IResult<&str, String> { -/// alt(( -/// digit1.map(|x: &str| x.to_string()), -/// delimited(char('('), add, char(')')) -/// )).parse(i) -/// } -/// -/// assert_eq!(single("(1+2*3)"), Ok(("", String::from("+1*23")))); -/// assert_eq!(single("((1+2)*3)"), Ok(("", String::from("*+123")))); -/// assert_eq!(single("(1*2+3)"), Ok(("", String::from("+*123")))); -/// assert_eq!(single("((1+2*3)+4)"), Ok(("", String::from("++1*234")))); -/// assert_eq!(single("(1+(2*3+4))"), Ok(("", String::from("+1+*234")))); -/// ``` -pub fn left_assoc( - child: F, - operator: G, - builder: B, -) -> impl Parser -where - I: Clone + Input, - E: ParseError, - F: Parser, - G: Parser, - B: FnMut(O, OP, O) -> O, -{ - LeftAssoc { - child, - operator, - builder, - } -} - -/// Parser implementation for the [separated_list1] combinator -pub struct LeftAssoc { - child: F, - operator: G, - builder: B, -} - -impl Parser for LeftAssoc -where - I: Clone + Input, - E: ParseError, - F: Parser, - G: Parser, - B: FnMut(O, OP, O) -> O, -{ - type Output = O; - type Error = E; - - fn process( - &mut self, - mut i: I, - ) -> crate::PResult { - let (i1, mut res) = self.child.process::(i)?; - i = i1; - - loop { - let len = i.input_len(); - match self - .operator - .process::>(i.clone()) - { - Err(Err::Error(_)) => return Ok((i, res)), - Err(Err::Failure(e)) => return Err(Err::Failure(e)), - Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), - Ok((i1, op)) => { - match self - .child - .process::>(i1.clone()) - { - Err(Err::Error(_)) => return Ok((i, res)), - Err(Err::Failure(e)) => return Err(Err::Failure(e)), - Err(Err::Incomplete(e)) => return Err(Err::Incomplete(e)), - Ok((i2, rhs)) => { - // infinite loop check: the parser must always consume - if i2.input_len() == len { - return Err(Err::Error(OM::Error::bind(|| { - >::Error::from_error_kind(i, ErrorKind::SeparatedList) - }))); - } - // there is no combine() with 3 arguments, fake it with a tuple and two calls - let op_rhs = OM::Output::combine(op, rhs, |op, rhs| (op, rhs)); - res = OM::Output::combine(res, op_rhs, |lhs, (op, rhs)| (self.builder)(lhs, op, rhs)); - i = i2; - } - } - } - } - } - } -}