Merge branch 'main' into clippy

rust-bakery · Oct 21, 2023 · 1fe2f16 · 1fe2f16
2 parents 5cb8623 + d4318f6
commit 1fe2f16
Show file tree

Hide file tree

Showing 9 changed files with 239 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -140,7 +140,7 @@ formats such as JSON, nom can manage it, and provides you with useful tools:
 
 Example projects:
 
-- [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser)
+- [HTTP proxy](https://github.com/sozu-proxy/sozu/blob/main/lib/src/protocol/h2/parser.rs)
 - [TOML parser](https://github.com/joelself/tomllib)
 
 ### Programming language parsers
@@ -175,7 +175,7 @@ It allows you to build powerful, deterministic state machines for your protocols
 
 Example projects:
 
-- [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser)
+- [HTTP proxy](https://github.com/sozu-proxy/sozu/blob/main/lib/src/protocol/h2/parser.rs)
 - [Using nom with generators](https://github.com/rust-bakery/generator_nom)
 
 ## Parser combinators
@@ -253,6 +253,7 @@ Here is a (non exhaustive) list of known projects using nom:
 
 - Text file formats: [Ceph Crush](https://github.com/cholcombe973/crushtool),
 [Cronenberg](https://github.com/ayrat555/cronenberg),
+[Email](https://github.com/deuxfleurs-org/eml-codec),
 [XFS Runtime Stats](https://github.com/ChrisMacNaughton/xfs-rs),
 [CSV](https://github.com/GuillaumeGomez/csv-parser),
 [FASTA](https://github.com/TianyiShi2001/nom-fasta),
@@ -264,7 +265,8 @@ Here is a (non exhaustive) list of known projects using nom:
 [PDB](https://github.com/TianyiShi2001/nom-pdb),
 [proto files](https://github.com/tafia/protobuf-parser),
 [Fountain screenplay markup](https://github.com/adamchalmers/fountain-rs),
-[vimwiki](https://github.com/chipsenkbeil/vimwiki-rs/tree/master/vimwiki) & [vimwiki_macros](https://github.com/chipsenkbeil/vimwiki-rs/tree/master/vimwiki_macros)
+[vimwiki](https://github.com/chipsenkbeil/vimwiki-rs/tree/master/vimwiki) & [vimwiki_macros](https://github.com/chipsenkbeil/vimwiki-rs/tree/master/vimwiki_macros),
+[Kconfig language](https://github.com/Mcdostone/nom-kconfig)
 - Programming languages:
 [PHP](https://github.com/tagua-vm/parser),
 [Basic Calculator](https://github.com/balajisivaraman/basic_calculator_rs),
@@ -278,7 +280,8 @@ Here is a (non exhaustive) list of known projects using nom:
 [CSML](https://github.com/CSML-by-Clevy/csml-engine/tree/dev/csml_interpreter),
 [Wasm](https://github.com/fabrizio-m/wasm-nom),
 [Pseudocode](https://github.com/Gungy2/pseudocod),
-[Filter for MeiliSearch](https://github.com/meilisearch/meilisearch)
+[Filter for MeiliSearch](https://github.com/meilisearch/meilisearch),
+[PotterScript](https://github.com/fmiras/potterscript)
 - Interface definition formats: [Thrift](https://github.com/thehydroimpulse/thrust)
 - Audio, video and image formats:
 [GIF](https://github.com/Geal/gif.rs),
@@ -299,7 +302,7 @@ Here is a (non exhaustive) list of known projects using nom:
 [DHCP](https://github.com/rusticata/dhcp-parser),
 [HTTP](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http),
 [URI](https://github.com/santifa/rrp/blob/master/src/uri.rs),
-[IMAP](https://github.com/djc/tokio-imap),
+[IMAP](https://github.com/djc/tokio-imap) ([alt](https://github.com/duesee/imap-codec)),
 [IRC](https://github.com/Detegr/RBot-parser),
 [Pcap-NG](https://github.com/richo/pcapng-rs),
 [Pcap](https://github.com/ithinuel/pcap-rs),
@@ -313,6 +316,7 @@ Here is a (non exhaustive) list of known projects using nom:
 [IPFIX / Netflow v10](https://github.com/dominotree/rs-ipfix),
 [GTP](https://github.com/fuerstenau/gorrosion-gtp),
 [SIP](https://github.com/kurotych/sipcore/tree/master/crates/sipmsg),
+[SMTP](https://github.com/Ekleog/kannader),
 [Prometheus](https://github.com/vectordotdev/vector/blob/master/lib/prometheus-parser/src/line.rs)
 - Language specifications:
 [BNF](https://github.com/shnewto/bnf)

diff --git a/doc/choosing_a_combinator.md b/doc/choosing_a_combinator.md
@@ -131,6 +131,7 @@ Use these functions with a combinator like `take_while`:
 - [`is_digit`](https://docs.rs/nom/latest/nom/character/fn.is_digit.html): Tests if byte is ASCII digit: `[0-9]`
 - [`is_hex_digit`](https://docs.rs/nom/latest/nom/character/fn.is_hex_digit.html): Tests if byte is ASCII hex digit: `[0-9A-Fa-f]`
 - [`is_oct_digit`](https://docs.rs/nom/latest/nom/character/fn.is_oct_digit.html): Tests if byte is ASCII octal digit: `[0-7]`
+- [`is_bin_digit`](https://docs.rs/nom/latest/nom/character/fn.is_bin_digit.html): Tests if byte is ASCII binary digit: `[0-1]`
 - [`is_space`](https://docs.rs/nom/latest/nom/character/fn.is_space.html): Tests if byte is ASCII space or tab: `[ \t]`
 - [`is_newline`](https://docs.rs/nom/latest/nom/character/fn.is_newline.html): Tests if byte is ASCII newline: `[\n]`
 
@@ -150,6 +151,7 @@ Alternatively there are ready to use functions:
 - [`newline`](https://docs.rs/nom/latest/nom/character/complete/fn.newline.html): Matches a newline character `\n`
 - [`not_line_ending`](https://docs.rs/nom/latest/nom/character/complete/fn.not_line_ending.html): Recognizes a string of any char except `\r` or `\n`
 - [`oct_digit0`](https://docs.rs/nom/latest/nom/character/complete/fn.oct_digit0.html): Recognizes zero or more octal characters: `[0-7]`. [`oct_digit1`](https://docs.rs/nom/latest/nom/character/complete/fn.oct_digit1.html) does the same but returns at least one character
+- [`bin_digit0`](https://docs.rs/nom/latest/nom/character/complete/fn.bin_digit0.html): Recognizes zero or more binary characters: `[0-1]`. [`bin_digit1`](https://docs.rs/nom/latest/nom/character/complete/fn.bin_digit1.html) does the same but returns at least one character
 - [`rest`](https://docs.rs/nom/latest/nom/combinator/fn.rest.html): Return the remaining input
 - [`rest_len`](https://docs.rs/nom/latest/nom/combinator/fn.rest_len.html): Return the length of the remaining input
 - [`space0`](https://docs.rs/nom/latest/nom/character/complete/fn.space0.html): Recognizes zero or more spaces and tabs. [`space1`](https://docs.rs/nom/latest/nom/character/complete/fn.space1.html) does the same but returns at least one character

diff --git a/src/bits/complete.rs b/src/bits/complete.rs
@@ -43,13 +43,13 @@ where
     if count == 0 {
       Ok(((input, bit_offset), 0u8.into()))
     } else {
-      let cnt = (count + bit_offset).div(8);
       if input.input_len() * 8 < count + bit_offset {
         Err(Err::Error(E::from_error_kind(
           (input, bit_offset),
           ErrorKind::Eof,
         )))
       } else {
+        let cnt = (count + bit_offset).div(8);
         let mut acc: O = 0_u8.into();
         let mut offset: usize = bit_offset;
         let mut remaining: usize = count;

diff --git a/src/bytes/tests.rs b/src/bytes/tests.rs
@@ -1,7 +1,7 @@
 use crate::character::is_alphabetic;
 use crate::character::streaming::{
-  alpha1 as alpha, alphanumeric1 as alphanumeric, digit1 as digit, hex_digit1 as hex_digit,
-  multispace1 as multispace, oct_digit1 as oct_digit, space1 as space,
+  alpha1 as alpha, alphanumeric1 as alphanumeric, bin_digit1 as bin_digit, digit1 as digit,
+  hex_digit1 as hex_digit, multispace1 as multispace, oct_digit1 as oct_digit, space1 as space,
 };
 use crate::error::ErrorKind;
 use crate::internal::{Err, IResult, Needed};
@@ -336,6 +336,12 @@ fn recognize() {
   let rod = yod(&b"1234567;"[..]);
   assert_eq!(rod, Ok((semicolon, &b"1234567"[..])));
 
+  fn ybd(i: &[u8]) -> IResult<&[u8], &[u8]> {
+    recognize(bin_digit).parse(i)
+  }
+  let rbd = ybd(&b"101010;"[..]);
+  assert_eq!(rbd, Ok((semicolon, &b"101010"[..])));
+
   fn yan(i: &[u8]) -> IResult<&[u8], &[u8]> {
     recognize(alphanumeric).parse(i)
   }

diff --git a/src/character/complete.rs b/src/character/complete.rs
@@ -513,6 +513,56 @@ where
   input.split_at_position1_complete(|item| !item.is_oct_digit(), ErrorKind::OctDigit)
 }
 
+/// Recognizes zero or more binary characters: 0-1
+///
+/// *Complete version*: Will return the whole input if no terminating token is found (a non binary
+/// digit character).
+/// # Example
+///
+/// ```
+/// # use nom::{Err, error::ErrorKind, IResult, Needed};
+/// # use nom::character::complete::bin_digit0;
+/// fn parser(input: &str) -> IResult<&str, &str> {
+///     bin_digit0(input)
+/// }
+///
+/// assert_eq!(parser("013a"), Ok(("3a", "01")));
+/// assert_eq!(parser("a013"), Ok(("a013", "")));
+/// assert_eq!(parser(""), Ok(("", "")));
+/// ```
+pub fn bin_digit0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E>
+where
+  T: Input,
+  <T as Input>::Item: AsChar,
+{
+  input.split_at_position_complete(|item| !item.is_bin_digit())
+}
+
+/// Recognizes one or more binary characters: 0-1
+///
+/// *Complete version*: Will return an error if there's not enough input data,
+/// or the whole input if no terminating token is found (a non binary digit character).
+/// # Example
+///
+/// ```
+/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed};
+/// # use nom::character::complete::bin_digit1;
+/// fn parser(input: &str) -> IResult<&str, &str> {
+///     bin_digit1(input)
+/// }
+///
+/// assert_eq!(parser("013a"), Ok(("3a", "01")));
+/// assert_eq!(parser("a013"), Err(Err::Error(Error::new("a013", ErrorKind::BinDigit))));
+/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::BinDigit))));
+/// ```
+pub fn bin_digit1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E>
+where
+  T: Input,
+  <T as Input>::Item: AsChar,
+{
+  input.split_at_position1_complete(|item| !item.is_bin_digit(), ErrorKind::BinDigit)
+}
+
 /// Recognizes zero or more ASCII numerical and alphabetic characters: 0-9, a-z, A-Z
 ///
 /// *Complete version*: Will return the whole input if no terminating token is found (a non
@@ -864,6 +914,13 @@ mod tests {
     assert_eq!(oct_digit1::<_, (_, ErrorKind)>(b), Ok((empty, b)));
     assert_eq!(oct_digit1(c), Err(Err::Error((c, ErrorKind::OctDigit))));
     assert_eq!(oct_digit1(d), Err(Err::Error((d, ErrorKind::OctDigit))));
+    assert_eq!(bin_digit1(a), Err(Err::Error((a, ErrorKind::BinDigit))));
+    assert_eq!(
+      bin_digit1::<_, (_, ErrorKind)>(b), 
+      Ok((&b"234"[..], &b"1"[..]))
+    );
+    assert_eq!(bin_digit1(c), Err(Err::Error((c, ErrorKind::BinDigit))));
+    assert_eq!(bin_digit1(d), Err(Err::Error((d, ErrorKind::BinDigit))));
     assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(a), Ok((empty, a)));
     //assert_eq!(fix_error!(b,(), alphanumeric), Ok((empty, b)));
     assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(c), Ok((empty, c)));
@@ -901,6 +958,13 @@ mod tests {
     assert_eq!(oct_digit1::<_, (_, ErrorKind)>(b), Ok((empty, b)));
     assert_eq!(oct_digit1(c), Err(Err::Error((c, ErrorKind::OctDigit))));
     assert_eq!(oct_digit1(d), Err(Err::Error((d, ErrorKind::OctDigit))));
+    assert_eq!(bin_digit1(a), Err(Err::Error((a, ErrorKind::BinDigit))));
+    assert_eq!(
+      bin_digit1::<_, (_, ErrorKind)>(b), 
+      Ok(("234", "1"))
+    );
+    assert_eq!(bin_digit1(c), Err(Err::Error((c, ErrorKind::BinDigit))));
+    assert_eq!(bin_digit1(d), Err(Err::Error((d, ErrorKind::BinDigit))));
     assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(a), Ok((empty, a)));
     //assert_eq!(fix_error!(b,(), alphanumeric), Ok((empty, b)));
     assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(c), Ok((empty, c)));
@@ -960,6 +1024,12 @@ mod tests {
       }
       _ => panic!("wrong return type in offset test for oct_digit"),
     }
+    match bin_digit1::<_, (_, ErrorKind)>(f) {
+      Ok((i, _)) => {
+        assert_eq!(f.offset(i) + i.len(), f.len());
+      }
+      _ => panic!("wrong return type in offset test for bin_digit"),
+    }
   }
 
   #[test]
@@ -1066,6 +1136,29 @@ mod tests {
     assert!(!crate::character::is_oct_digit(b'\x60'));
   }
 
+  #[test]
+  fn bin_digit_test() {
+    let i = &b"101010;"[..];
+    assert_parse!(bin_digit1(i), Ok((&b";"[..], &i[..i.len() - 1])));
+
+    let i = &b"2"[..];
+    assert_parse!(
+      bin_digit1(i),
+      Err(Err::Error(error_position!(i, ErrorKind::BinDigit)))
+    );
+
+    assert!(crate::character::is_bin_digit(b'0'));
+    assert!(crate::character::is_bin_digit(b'1'));
+    assert!(!crate::character::is_bin_digit(b'8'));
+    assert!(!crate::character::is_bin_digit(b'9'));
+    assert!(!crate::character::is_bin_digit(b'a'));
+    assert!(!crate::character::is_bin_digit(b'A'));
+    assert!(!crate::character::is_bin_digit(b'/'));
+    assert!(!crate::character::is_bin_digit(b':'));
+    assert!(!crate::character::is_bin_digit(b'@'));
+    assert!(!crate::character::is_bin_digit(b'\x60'));
+  }
+
   #[test]
   fn full_line_windows() {
     use crate::sequence::pair;

diff --git a/src/character/mod.rs b/src/character/mod.rs
@@ -75,6 +75,22 @@ pub fn is_oct_digit(chr: u8) -> bool {
   matches!(chr, 0x30..=0x37)
 }
 
+/// Tests if byte is ASCII binary digit: 0-1
+///
+/// # Example
+///
+/// ```
+/// # use nom::character::is_bin_digit;
+/// assert_eq!(is_bin_digit(b'a'), false);
+/// assert_eq!(is_bin_digit(b'2'), false);
+/// assert_eq!(is_bin_digit(b'0'), true);
+/// assert_eq!(is_bin_digit(b'1'), true);
+/// ```
+#[inline]
+pub fn is_bin_digit(chr: u8) -> bool {
+  matches!(chr, 0x30..=0x31)
+}
+
 /// Tests if byte is ASCII alphanumeric: A-Z, a-z, 0-9
 ///
 /// # Example

diff --git a/src/character/streaming.rs b/src/character/streaming.rs
@@ -443,6 +443,48 @@ where
   input.split_at_position1(|item| !item.is_oct_digit(), ErrorKind::OctDigit)
 }
 
+/// Recognizes zero or more binary characters: 0-1
+///
+/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data,
+/// or if no terminating token is found (a non binary digit character).
+/// # Example
+///
+/// ```
+/// # use nom::{Err, error::ErrorKind, IResult, Needed};
+/// # use nom::character::streaming::bin_digit0;
+/// assert_eq!(bin_digit0::<_, (_, ErrorKind)>("013a"), Ok(("3a", "01")));
+/// assert_eq!(bin_digit0::<_, (_, ErrorKind)>("a013"), Ok(("a013", "")));
+/// assert_eq!(bin_digit0::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1))));
+/// ```
+pub fn bin_digit0<T, E: ParseError<T>>(input: T) -> IResult<T, T, E>
+where
+  T: Input,
+  <T as Input>::Item: AsChar,
+{
+  input.split_at_position(|item| !item.is_bin_digit())
+}
+
+/// Recognizes one or more binary characters: 0-1
+///
+/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data,
+/// or if no terminating token is found (a non binary digit character).
+/// # Example
+///
+/// ```
+/// # use nom::{Err, error::ErrorKind, IResult, Needed};
+/// # use nom::character::streaming::bin_digit1;
+/// assert_eq!(bin_digit1::<_, (_, ErrorKind)>("013a"), Ok(("3a", "01")));
+/// assert_eq!(bin_digit1::<_, (_, ErrorKind)>("a013"), Err(Err::Error(("a013", ErrorKind::BinDigit))));
+/// assert_eq!(bin_digit1::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1))));
+/// ```
+pub fn bin_digit1<T, E: ParseError<T>>(input: T) -> IResult<T, T, E>
+where
+  T: Input,
+  <T as Input>::Item: AsChar,
+{
+  input.split_at_position1(|item| !item.is_bin_digit(), ErrorKind::BinDigit)
+}
+
 /// Recognizes zero or more ASCII numerical and alphabetic characters: 0-9, a-z, A-Z
 ///
 /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data,
@@ -795,6 +837,17 @@ mod tests {
       alphanumeric1::<_, (_, ErrorKind)>(a),
       Err(Err::Incomplete(Needed::new(1)))
     );
+    assert_eq!(bin_digit1(a), Err(Err::Error((a, ErrorKind::BinDigit))));
+    assert_eq!(
+      bin_digit1::<_, (_, ErrorKind)>(b),
+      Ok((&b"234"[..], &b"1"[..]))
+    );
+    assert_eq!(bin_digit1(c), Err(Err::Error((c, ErrorKind::BinDigit))));
+    assert_eq!(bin_digit1(d), Err(Err::Error((d, ErrorKind::BinDigit))));
+    assert_eq!(
+      alphanumeric1::<_, (_, ErrorKind)>(a),
+      Err(Err::Incomplete(Needed::new(1)))
+    );
     //assert_eq!(fix_error!(b,(), alphanumeric1), Ok((empty, b)));
     assert_eq!(
       alphanumeric1::<_, (_, ErrorKind)>(c),
@@ -854,6 +907,13 @@ mod tests {
     );
     assert_eq!(oct_digit1(c), Err(Err::Error((c, ErrorKind::OctDigit))));
     assert_eq!(oct_digit1(d), Err(Err::Error((d, ErrorKind::OctDigit))));
+    assert_eq!(bin_digit1(a), Err(Err::Error((a, ErrorKind::BinDigit))));
+    assert_eq!(
+      bin_digit1::<_, (_, ErrorKind)>(b),
+      Ok(("234", "1"))
+    );
+    assert_eq!(bin_digit1(c), Err(Err::Error((c, ErrorKind::BinDigit))));
+    assert_eq!(bin_digit1(d), Err(Err::Error((d, ErrorKind::BinDigit))));
     assert_eq!(
       alphanumeric1::<_, (_, ErrorKind)>(a),
       Err(Err::Incomplete(Needed::new(1)))
@@ -922,6 +982,12 @@ mod tests {
       }
       _ => panic!("wrong return type in offset test for oct_digit"),
     }
+    match bin_digit1::<_, (_, ErrorKind)>(f) {
+      Ok((i, _)) => {
+        assert_eq!(f.offset(i) + i.len(), f.len());
+      }
+      _ => panic!("wrong return type in offset test for bin_digit"),
+    }
   }
 
   #[test]
@@ -1034,6 +1100,29 @@ mod tests {
     assert!(!crate::character::is_oct_digit(b'\x60'));
   }
 
+  #[test]
+  fn bin_digit_test() {
+    let i = &b"01;"[..];
+    assert_parse!(bin_digit1(i), Ok((&b";"[..], &i[..i.len() - 1])));
+
+    let i = &b"8"[..];
+    assert_parse!(
+      bin_digit1(i),
+      Err(Err::Error(error_position!(i, ErrorKind::BinDigit)))
+    );
+
+    assert!(crate::character::is_bin_digit(b'0'));
+    assert!(crate::character::is_bin_digit(b'1'));
+    assert!(!crate::character::is_bin_digit(b'8'));
+    assert!(!crate::character::is_bin_digit(b'9'));
+    assert!(!crate::character::is_bin_digit(b'a'));
+    assert!(!crate::character::is_bin_digit(b'A'));
+    assert!(!crate::character::is_bin_digit(b'/'));
+    assert!(!crate::character::is_bin_digit(b':'));
+    assert!(!crate::character::is_bin_digit(b'@'));
+    assert!(!crate::character::is_bin_digit(b'\x60'));
+  }
+
   #[test]
   fn full_line_windows() {
     fn take_full_line(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> {