From 369484a8b5b26597d516106b39b8ef1bad33e9eb Mon Sep 17 00:00:00 2001 From: Tristan F Date: Sat, 21 Oct 2023 07:51:52 -0400 Subject: [PATCH 1/8] docs(readme): correct dead sozu-proxy/sozu link (#1697) --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5d984fa71..46d4408a7 100644 --- a/README.md +++ b/README.md @@ -140,7 +140,7 @@ formats such as JSON, nom can manage it, and provides you with useful tools: Example projects: -- [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser) +- [HTTP proxy](https://github.com/sozu-proxy/sozu/blob/main/lib/src/protocol/h2/parser.rs) - [TOML parser](https://github.com/joelself/tomllib) ### Programming language parsers @@ -175,7 +175,7 @@ It allows you to build powerful, deterministic state machines for your protocols Example projects: -- [HTTP proxy](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http/parser) +- [HTTP proxy](https://github.com/sozu-proxy/sozu/blob/main/lib/src/protocol/h2/parser.rs) - [Using nom with generators](https://github.com/rust-bakery/generator_nom) ## Parser combinators From ff8cc9e325897a5e8ac9f42e8a5be08c4b13b42e Mon Sep 17 00:00:00 2001 From: Federico Miras Date: Sat, 21 Oct 2023 08:54:41 -0300 Subject: [PATCH 2/8] docs: add PotterScript to README parsers (#1685) --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 46d4408a7..928679bf4 100644 --- a/README.md +++ b/README.md @@ -278,7 +278,8 @@ Here is a (non exhaustive) list of known projects using nom: [CSML](https://github.com/CSML-by-Clevy/csml-engine/tree/dev/csml_interpreter), [Wasm](https://github.com/fabrizio-m/wasm-nom), [Pseudocode](https://github.com/Gungy2/pseudocod), -[Filter for MeiliSearch](https://github.com/meilisearch/meilisearch) +[Filter for MeiliSearch](https://github.com/meilisearch/meilisearch), +[PotterScript](https://github.com/fmiras/potterscript) - Interface definition formats: [Thrift](https://github.com/thehydroimpulse/thrust) - Audio, video and image formats: [GIF](https://github.com/Geal/gif.rs), From 3c2b2e10a1d790f0f24c15c6f6db01c081295c16 Mon Sep 17 00:00:00 2001 From: Taras Tsugrii Date: Sat, 21 Oct 2023 05:02:11 -0700 Subject: [PATCH 3/8] [nit] Move cnt definition to the branch it's used in. (#1683) It's not a big deal and compiler should be able to do this automatically, but this also reduces the scope of the definition. --- src/bits/complete.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/bits/complete.rs b/src/bits/complete.rs index d8aaa2d5d..d133bccb0 100644 --- a/src/bits/complete.rs +++ b/src/bits/complete.rs @@ -43,13 +43,13 @@ where if count == 0 { Ok(((input, bit_offset), 0u8.into())) } else { - let cnt = (count + bit_offset).div(8); if input.input_len() * 8 < count + bit_offset { Err(Err::Error(E::from_error_kind( (input, bit_offset), ErrorKind::Eof, ))) } else { + let cnt = (count + bit_offset).div(8); let mut acc: O = 0_u8.into(); let mut offset: usize = bit_offset; let mut remaining: usize = count; From 671f77086dd78013d56e6b82805f7e5e9d83972f Mon Sep 17 00:00:00 2001 From: Yann Prono Date: Sat, 21 Oct 2023 14:03:02 +0200 Subject: [PATCH 4/8] add Kconfig parser (#1681) Co-authored-by: Yann Prono --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 928679bf4..c6c99a3b2 100644 --- a/README.md +++ b/README.md @@ -264,7 +264,8 @@ Here is a (non exhaustive) list of known projects using nom: [PDB](https://github.com/TianyiShi2001/nom-pdb), [proto files](https://github.com/tafia/protobuf-parser), [Fountain screenplay markup](https://github.com/adamchalmers/fountain-rs), -[vimwiki](https://github.com/chipsenkbeil/vimwiki-rs/tree/master/vimwiki) & [vimwiki_macros](https://github.com/chipsenkbeil/vimwiki-rs/tree/master/vimwiki_macros) +[vimwiki](https://github.com/chipsenkbeil/vimwiki-rs/tree/master/vimwiki) & [vimwiki_macros](https://github.com/chipsenkbeil/vimwiki-rs/tree/master/vimwiki_macros), +[Kconfig language](https://github.com/Mcdostone/nom-kconfig) - Programming languages: [PHP](https://github.com/tagua-vm/parser), [Basic Calculator](https://github.com/balajisivaraman/basic_calculator_rs), From 2857a9af41f3fb1797c2debca7c10992517aa8b1 Mon Sep 17 00:00:00 2001 From: Quentin Dufour Date: Sat, 21 Oct 2023 14:04:07 +0200 Subject: [PATCH 5/8] Add eml-codec, imap-codec, smtp-message to README (#1680) --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index c6c99a3b2..5bbb0b301 100644 --- a/README.md +++ b/README.md @@ -253,6 +253,7 @@ Here is a (non exhaustive) list of known projects using nom: - Text file formats: [Ceph Crush](https://github.com/cholcombe973/crushtool), [Cronenberg](https://github.com/ayrat555/cronenberg), +[Email](https://github.com/deuxfleurs-org/eml-codec), [XFS Runtime Stats](https://github.com/ChrisMacNaughton/xfs-rs), [CSV](https://github.com/GuillaumeGomez/csv-parser), [FASTA](https://github.com/TianyiShi2001/nom-fasta), @@ -301,7 +302,7 @@ Here is a (non exhaustive) list of known projects using nom: [DHCP](https://github.com/rusticata/dhcp-parser), [HTTP](https://github.com/sozu-proxy/sozu/tree/main/lib/src/protocol/http), [URI](https://github.com/santifa/rrp/blob/master/src/uri.rs), -[IMAP](https://github.com/djc/tokio-imap), +[IMAP](https://github.com/djc/tokio-imap) ([alt](https://github.com/duesee/imap-codec)), [IRC](https://github.com/Detegr/RBot-parser), [Pcap-NG](https://github.com/richo/pcapng-rs), [Pcap](https://github.com/ithinuel/pcap-rs), @@ -315,6 +316,7 @@ Here is a (non exhaustive) list of known projects using nom: [IPFIX / Netflow v10](https://github.com/dominotree/rs-ipfix), [GTP](https://github.com/fuerstenau/gorrosion-gtp), [SIP](https://github.com/kurotych/sipcore/tree/master/crates/sipmsg), +[SMTP](https://github.com/Ekleog/kannader), [Prometheus](https://github.com/vectordotdev/vector/blob/master/lib/prometheus-parser/src/line.rs) - Language specifications: [BNF](https://github.com/shnewto/bnf) From 95d992633620e2db1000481e8fb224d3d5c813b7 Mon Sep 17 00:00:00 2001 From: rrupy <147432801+rrupy@users.noreply.github.com> Date: Sat, 21 Oct 2023 12:04:21 +0000 Subject: [PATCH 6/8] fix "using `.clone()` on a double reference" warnings (#1700) Co-authored-by: rrupy --- src/traits.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/traits.rs b/src/traits.rs index 9f74f8c73..30ac9eb75 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -333,7 +333,7 @@ impl<'a> Input for &'a [u8] { { match self.iter().position(|c| predicate(*c)) { Some(0) => Err(Err::Error(OM::Error::bind(|| { - E::from_error_kind(self.clone(), e) + E::from_error_kind(self, e) }))), Some(n) => Ok((self.take_from(n), OM::Output::bind(|| self.take(n)))), None => { @@ -341,7 +341,7 @@ impl<'a> Input for &'a [u8] { Err(Err::Incomplete(Needed::new(1))) } else if self.is_empty() { Err(Err::Error(OM::Error::bind(|| { - E::from_error_kind(self.clone(), e) + E::from_error_kind(self, e) }))) } else { Ok(( @@ -530,7 +530,7 @@ impl<'a> Input for &'a str { { match self.find(predicate) { Some(0) => Err(Err::Error(OM::Error::bind(|| { - E::from_error_kind(self.clone(), e) + E::from_error_kind(self, e) }))), Some(n) => unsafe { // find() returns a byte index that is already in the slice at a char boundary @@ -544,7 +544,7 @@ impl<'a> Input for &'a str { Err(Err::Incomplete(Needed::new(1))) } else if self.len() == 0 { Err(Err::Error(OM::Error::bind(|| { - E::from_error_kind(self.clone(), e) + E::from_error_kind(self, e) }))) } else { // the end of slice is a char boundary From 0c5d5b9a74d999fbc44ee8eea85ad8a6aabfa450 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=A5=9D=E5=81=A5=E8=81=AA?= <68471954+Chasing1020@users.noreply.github.com> Date: Sat, 21 Oct 2023 20:06:15 +0800 Subject: [PATCH 7/8] Add is_bin_digit method (#1658) --- doc/choosing_a_combinator.md | 2 + src/bytes/tests.rs | 8 +++- src/character/complete.rs | 93 ++++++++++++++++++++++++++++++++++++ src/character/mod.rs | 16 +++++++ src/character/streaming.rs | 89 ++++++++++++++++++++++++++++++++++ src/error.rs | 3 ++ src/traits.rs | 18 +++++++ 7 files changed, 228 insertions(+), 1 deletion(-) diff --git a/doc/choosing_a_combinator.md b/doc/choosing_a_combinator.md index fa78fc36d..b6073cf14 100644 --- a/doc/choosing_a_combinator.md +++ b/doc/choosing_a_combinator.md @@ -131,6 +131,7 @@ Use these functions with a combinator like `take_while`: - [`is_digit`](https://docs.rs/nom/latest/nom/character/fn.is_digit.html): Tests if byte is ASCII digit: `[0-9]` - [`is_hex_digit`](https://docs.rs/nom/latest/nom/character/fn.is_hex_digit.html): Tests if byte is ASCII hex digit: `[0-9A-Fa-f]` - [`is_oct_digit`](https://docs.rs/nom/latest/nom/character/fn.is_oct_digit.html): Tests if byte is ASCII octal digit: `[0-7]` +- [`is_bin_digit`](https://docs.rs/nom/latest/nom/character/fn.is_bin_digit.html): Tests if byte is ASCII binary digit: `[0-1]` - [`is_space`](https://docs.rs/nom/latest/nom/character/fn.is_space.html): Tests if byte is ASCII space or tab: `[ \t]` - [`is_newline`](https://docs.rs/nom/latest/nom/character/fn.is_newline.html): Tests if byte is ASCII newline: `[\n]` @@ -150,6 +151,7 @@ Alternatively there are ready to use functions: - [`newline`](https://docs.rs/nom/latest/nom/character/complete/fn.newline.html): Matches a newline character `\n` - [`not_line_ending`](https://docs.rs/nom/latest/nom/character/complete/fn.not_line_ending.html): Recognizes a string of any char except `\r` or `\n` - [`oct_digit0`](https://docs.rs/nom/latest/nom/character/complete/fn.oct_digit0.html): Recognizes zero or more octal characters: `[0-7]`. [`oct_digit1`](https://docs.rs/nom/latest/nom/character/complete/fn.oct_digit1.html) does the same but returns at least one character +- [`bin_digit0`](https://docs.rs/nom/latest/nom/character/complete/fn.bin_digit0.html): Recognizes zero or more binary characters: `[0-1]`. [`bin_digit1`](https://docs.rs/nom/latest/nom/character/complete/fn.bin_digit1.html) does the same but returns at least one character - [`rest`](https://docs.rs/nom/latest/nom/combinator/fn.rest.html): Return the remaining input - [`rest_len`](https://docs.rs/nom/latest/nom/combinator/fn.rest_len.html): Return the length of the remaining input - [`space0`](https://docs.rs/nom/latest/nom/character/complete/fn.space0.html): Recognizes zero or more spaces and tabs. [`space1`](https://docs.rs/nom/latest/nom/character/complete/fn.space1.html) does the same but returns at least one character diff --git a/src/bytes/tests.rs b/src/bytes/tests.rs index 25e535a4e..e7e9becf7 100644 --- a/src/bytes/tests.rs +++ b/src/bytes/tests.rs @@ -1,7 +1,7 @@ use crate::character::is_alphabetic; use crate::character::streaming::{ alpha1 as alpha, alphanumeric1 as alphanumeric, digit1 as digit, hex_digit1 as hex_digit, - multispace1 as multispace, oct_digit1 as oct_digit, space1 as space, + multispace1 as multispace, oct_digit1 as oct_digit, bin_digit1 as bin_digit, space1 as space, }; use crate::error::ErrorKind; use crate::internal::{Err, IResult, Needed}; @@ -336,6 +336,12 @@ fn recognize() { let rod = yod(&b"1234567;"[..]); assert_eq!(rod, Ok((semicolon, &b"1234567"[..]))); + fn ybd(i: &[u8]) -> IResult<&[u8], &[u8]> { + recognize(bin_digit)(i) + } + let rbd = ybd(&b"101010;"[..]); + assert_eq!(rbd, Ok((semicolon, &b"101010"[..]))); + fn yan(i: &[u8]) -> IResult<&[u8], &[u8]> { recognize(alphanumeric).parse(i) } diff --git a/src/character/complete.rs b/src/character/complete.rs index fa08c9222..d8045c9f2 100644 --- a/src/character/complete.rs +++ b/src/character/complete.rs @@ -513,6 +513,56 @@ where input.split_at_position1_complete(|item| !item.is_oct_digit(), ErrorKind::OctDigit) } +/// Recognizes zero or more binary characters: 0-1 +/// +/// *Complete version*: Will return the whole input if no terminating token is found (a non binary +/// digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::complete::bin_digit0; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// bin_digit0(input) +/// } +/// +/// assert_eq!(parser("013a"), Ok(("3a", "01"))); +/// assert_eq!(parser("a013"), Ok(("a013", ""))); +/// assert_eq!(parser(""), Ok(("", ""))); +/// ``` +pub fn bin_digit0>(input: T) -> IResult +where + T: Input, + ::Item: AsChar, +{ + input.split_at_position_complete(|item| !item.is_bin_digit()) +} + +/// Recognizes one or more binary characters: 0-1 +/// +/// *Complete version*: Will return an error if there's not enough input data, +/// or the whole input if no terminating token is found (a non binary digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::{Error, ErrorKind}, IResult, Needed}; +/// # use nom::character::complete::bin_digit1; +/// fn parser(input: &str) -> IResult<&str, &str> { +/// bin_digit1(input) +/// } +/// +/// assert_eq!(parser("013a"), Ok(("3a", "01"))); +/// assert_eq!(parser("a013"), Err(Err::Error(Error::new("a013", ErrorKind::BinDigit)))); +/// assert_eq!(parser(""), Err(Err::Error(Error::new("", ErrorKind::BinDigit)))); +/// ``` +pub fn bin_digit1>(input: T) -> IResult +where + T: Input, + ::Item: AsChar, +{ + input.split_at_position1_complete(|item| !item.is_bin_digit(), ErrorKind::BinDigit) +} + /// Recognizes zero or more ASCII numerical and alphabetic characters: 0-9, a-z, A-Z /// /// *Complete version*: Will return the whole input if no terminating token is found (a non @@ -864,6 +914,13 @@ mod tests { assert_eq!(oct_digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); assert_eq!(oct_digit1(c), Err(Err::Error((c, ErrorKind::OctDigit)))); assert_eq!(oct_digit1(d), Err(Err::Error((d, ErrorKind::OctDigit)))); + assert_eq!(bin_digit1(a), Err(Err::Error((a, ErrorKind::BinDigit)))); + assert_eq!( + bin_digit1::<_, (_, ErrorKind)>(b), + Ok((&b"234"[..], &b"1"[..])) + ); + assert_eq!(bin_digit1(c), Err(Err::Error((c, ErrorKind::BinDigit)))); + assert_eq!(bin_digit1(d), Err(Err::Error((d, ErrorKind::BinDigit)))); assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(a), Ok((empty, a))); //assert_eq!(fix_error!(b,(), alphanumeric), Ok((empty, b))); assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(c), Ok((empty, c))); @@ -901,6 +958,13 @@ mod tests { assert_eq!(oct_digit1::<_, (_, ErrorKind)>(b), Ok((empty, b))); assert_eq!(oct_digit1(c), Err(Err::Error((c, ErrorKind::OctDigit)))); assert_eq!(oct_digit1(d), Err(Err::Error((d, ErrorKind::OctDigit)))); + assert_eq!(bin_digit1(a), Err(Err::Error((a, ErrorKind::BinDigit)))); + assert_eq!( + bin_digit1::<_, (_, ErrorKind)>(b), + Ok(("234", "1")) + ); + assert_eq!(bin_digit1(c), Err(Err::Error((c, ErrorKind::BinDigit)))); + assert_eq!(bin_digit1(d), Err(Err::Error((d, ErrorKind::BinDigit)))); assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(a), Ok((empty, a))); //assert_eq!(fix_error!(b,(), alphanumeric), Ok((empty, b))); assert_eq!(alphanumeric1::<_, (_, ErrorKind)>(c), Ok((empty, c))); @@ -960,6 +1024,12 @@ mod tests { } _ => panic!("wrong return type in offset test for oct_digit"), } + match bin_digit1::<_, (_, ErrorKind)>(f) { + Ok((i, _)) => { + assert_eq!(f.offset(i) + i.len(), f.len()); + } + _ => panic!("wrong return type in offset test for bin_digit"), + } } #[test] @@ -1066,6 +1136,29 @@ mod tests { assert!(!crate::character::is_oct_digit(b'\x60')); } + #[test] + fn bin_digit_test() { + let i = &b"101010;"[..]; + assert_parse!(bin_digit1(i), Ok((&b";"[..], &i[..i.len() - 1]))); + + let i = &b"2"[..]; + assert_parse!( + bin_digit1(i), + Err(Err::Error(error_position!(i, ErrorKind::BinDigit))) + ); + + assert!(crate::character::is_bin_digit(b'0')); + assert!(crate::character::is_bin_digit(b'1')); + assert!(!crate::character::is_bin_digit(b'8')); + assert!(!crate::character::is_bin_digit(b'9')); + assert!(!crate::character::is_bin_digit(b'a')); + assert!(!crate::character::is_bin_digit(b'A')); + assert!(!crate::character::is_bin_digit(b'/')); + assert!(!crate::character::is_bin_digit(b':')); + assert!(!crate::character::is_bin_digit(b'@')); + assert!(!crate::character::is_bin_digit(b'\x60')); + } + #[test] fn full_line_windows() { use crate::sequence::pair; diff --git a/src/character/mod.rs b/src/character/mod.rs index 400e96485..87df8191c 100644 --- a/src/character/mod.rs +++ b/src/character/mod.rs @@ -75,6 +75,22 @@ pub fn is_oct_digit(chr: u8) -> bool { matches!(chr, 0x30..=0x37) } +/// Tests if byte is ASCII binary digit: 0-1 +/// +/// # Example +/// +/// ``` +/// # use nom::character::is_bin_digit; +/// assert_eq!(is_bin_digit(b'a'), false); +/// assert_eq!(is_bin_digit(b'2'), false); +/// assert_eq!(is_bin_digit(b'0'), true); +/// assert_eq!(is_bin_digit(b'1'), true); +/// ``` +#[inline] +pub fn is_bin_digit(chr: u8) -> bool { + matches!(chr, 0x30..=0x31) +} + /// Tests if byte is ASCII alphanumeric: A-Z, a-z, 0-9 /// /// # Example diff --git a/src/character/streaming.rs b/src/character/streaming.rs index 1536c0e91..a3d03e229 100644 --- a/src/character/streaming.rs +++ b/src/character/streaming.rs @@ -443,6 +443,48 @@ where input.split_at_position1(|item| !item.is_oct_digit(), ErrorKind::OctDigit) } +/// Recognizes zero or more binary characters: 0-1 +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non binary digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::bin_digit0; +/// assert_eq!(bin_digit0::<_, (_, ErrorKind)>("013a"), Ok(("3a", "01"))); +/// assert_eq!(bin_digit0::<_, (_, ErrorKind)>("a013"), Ok(("a013", ""))); +/// assert_eq!(bin_digit0::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn bin_digit0>(input: T) -> IResult +where + T: Input, + ::Item: AsChar, +{ + input.split_at_position(|item| !item.is_bin_digit()) +} + +/// Recognizes one or more binary characters: 0-1 +/// +/// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, +/// or if no terminating token is found (a non binary digit character). +/// # Example +/// +/// ``` +/// # use nom::{Err, error::ErrorKind, IResult, Needed}; +/// # use nom::character::streaming::bin_digit1; +/// assert_eq!(bin_digit1::<_, (_, ErrorKind)>("013a"), Ok(("3a", "01"))); +/// assert_eq!(bin_digit1::<_, (_, ErrorKind)>("a013"), Err(Err::Error(("a013", ErrorKind::BinDigit)))); +/// assert_eq!(bin_digit1::<_, (_, ErrorKind)>(""), Err(Err::Incomplete(Needed::new(1)))); +/// ``` +pub fn bin_digit1>(input: T) -> IResult +where + T: Input, + ::Item: AsChar, +{ + input.split_at_position1(|item| !item.is_bin_digit(), ErrorKind::BinDigit) +} + /// Recognizes zero or more ASCII numerical and alphabetic characters: 0-9, a-z, A-Z /// /// *Streaming version*: Will return `Err(nom::Err::Incomplete(_))` if there's not enough input data, @@ -795,6 +837,17 @@ mod tests { alphanumeric1::<_, (_, ErrorKind)>(a), Err(Err::Incomplete(Needed::new(1))) ); + assert_eq!(bin_digit1(a), Err(Err::Error((a, ErrorKind::BinDigit)))); + assert_eq!( + bin_digit1::<_, (_, ErrorKind)>(b), + Ok((&b"234"[..], &b"1"[..])) + ); + assert_eq!(bin_digit1(c), Err(Err::Error((c, ErrorKind::BinDigit)))); + assert_eq!(bin_digit1(d), Err(Err::Error((d, ErrorKind::BinDigit)))); + assert_eq!( + alphanumeric1::<_, (_, ErrorKind)>(a), + Err(Err::Incomplete(Needed::new(1))) + ); //assert_eq!(fix_error!(b,(), alphanumeric1), Ok((empty, b))); assert_eq!( alphanumeric1::<_, (_, ErrorKind)>(c), @@ -854,6 +907,13 @@ mod tests { ); assert_eq!(oct_digit1(c), Err(Err::Error((c, ErrorKind::OctDigit)))); assert_eq!(oct_digit1(d), Err(Err::Error((d, ErrorKind::OctDigit)))); + assert_eq!(bin_digit1(a), Err(Err::Error((a, ErrorKind::BinDigit)))); + assert_eq!( + bin_digit1::<_, (_, ErrorKind)>(b), + Ok(("234", "1")) + ); + assert_eq!(bin_digit1(c), Err(Err::Error((c, ErrorKind::BinDigit)))); + assert_eq!(bin_digit1(d), Err(Err::Error((d, ErrorKind::BinDigit)))); assert_eq!( alphanumeric1::<_, (_, ErrorKind)>(a), Err(Err::Incomplete(Needed::new(1))) @@ -922,6 +982,12 @@ mod tests { } _ => panic!("wrong return type in offset test for oct_digit"), } + match bin_digit1::<_, (_, ErrorKind)>(f) { + Ok((i, _)) => { + assert_eq!(f.offset(i) + i.len(), f.len()); + } + _ => panic!("wrong return type in offset test for bin_digit"), + } } #[test] @@ -1034,6 +1100,29 @@ mod tests { assert!(!crate::character::is_oct_digit(b'\x60')); } + #[test] + fn bin_digit_test() { + let i = &b"01;"[..]; + assert_parse!(bin_digit1(i), Ok((&b";"[..], &i[..i.len() - 1]))); + + let i = &b"8"[..]; + assert_parse!( + bin_digit1(i), + Err(Err::Error(error_position!(i, ErrorKind::BinDigit))) + ); + + assert!(crate::character::is_bin_digit(b'0')); + assert!(crate::character::is_bin_digit(b'1')); + assert!(!crate::character::is_bin_digit(b'8')); + assert!(!crate::character::is_bin_digit(b'9')); + assert!(!crate::character::is_bin_digit(b'a')); + assert!(!crate::character::is_bin_digit(b'A')); + assert!(!crate::character::is_bin_digit(b'/')); + assert!(!crate::character::is_bin_digit(b':')); + assert!(!crate::character::is_bin_digit(b'@')); + assert!(!crate::character::is_bin_digit(b'\x60')); + } + #[test] fn full_line_windows() { fn take_full_line(i: &[u8]) -> IResult<&[u8], (&[u8], &[u8])> { diff --git a/src/error.rs b/src/error.rs index 22344b5c3..067187d54 100644 --- a/src/error.rs +++ b/src/error.rs @@ -436,6 +436,7 @@ pub enum ErrorKind { Digit, HexDigit, OctDigit, + BinDigit, AlphaNumeric, Space, MultiSpace, @@ -534,6 +535,7 @@ pub fn error_to_u32(e: &ErrorKind) -> u32 { ErrorKind::Fail => 75, ErrorKind::Many => 76, ErrorKind::Fold => 77, + ErrorKind::BinDigit => 78, } } @@ -584,6 +586,7 @@ impl ErrorKind { ErrorKind::ManyMN => "Many(m, n)", ErrorKind::HexDigit => "Hexadecimal Digit", ErrorKind::OctDigit => "Octal digit", + ErrorKind::BinDigit => "Binary digit", ErrorKind::Not => "Negation", ErrorKind::Permutation => "Permutation", ErrorKind::ManyTill => "ManyTill", diff --git a/src/traits.rs b/src/traits.rs index 30ac9eb75..fd3163dbf 100644 --- a/src/traits.rs +++ b/src/traits.rs @@ -701,6 +701,8 @@ pub trait AsChar: Copy { fn is_hex_digit(self) -> bool; /// Tests that self is an octal digit fn is_oct_digit(self) -> bool; + /// Tests that self is a binary digit + fn is_bin_digit(self) -> bool; /// Gets the len in bytes for self fn len(self) -> usize; } @@ -731,6 +733,10 @@ impl AsChar for u8 { matches!(self, 0x30..=0x37) } #[inline] + fn is_bin_digit(self) -> bool { + matches!(self, 0x30..=0x31) + } + #[inline] fn len(self) -> usize { 1 } @@ -761,6 +767,10 @@ impl<'a> AsChar for &'a u8 { matches!(*self, 0x30..=0x37) } #[inline] + fn is_bin_digit(self) -> bool { + matches!(*self, 0x30..=0x31) + } + #[inline] fn len(self) -> usize { 1 } @@ -792,6 +802,10 @@ impl AsChar for char { self.is_digit(8) } #[inline] + fn is_bin_digit(self) -> bool { + self.is_digit(2) + } + #[inline] fn len(self) -> usize { self.len_utf8() } @@ -823,6 +837,10 @@ impl<'a> AsChar for &'a char { self.is_digit(8) } #[inline] + fn is_bin_digit(self) -> bool { + self.is_digit(2) + } + #[inline] fn len(self) -> usize { self.len_utf8() } From d4318f6669f1509eb1c352f528811fdbeaf1623a Mon Sep 17 00:00:00 2001 From: Geoffroy Couprie Date: Sat, 21 Oct 2023 14:17:47 +0200 Subject: [PATCH 8/8] Fix bin_digit test (#1701) --- src/bytes/tests.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/bytes/tests.rs b/src/bytes/tests.rs index e7e9becf7..15106bf32 100644 --- a/src/bytes/tests.rs +++ b/src/bytes/tests.rs @@ -1,7 +1,7 @@ use crate::character::is_alphabetic; use crate::character::streaming::{ - alpha1 as alpha, alphanumeric1 as alphanumeric, digit1 as digit, hex_digit1 as hex_digit, - multispace1 as multispace, oct_digit1 as oct_digit, bin_digit1 as bin_digit, space1 as space, + alpha1 as alpha, alphanumeric1 as alphanumeric, bin_digit1 as bin_digit, digit1 as digit, + hex_digit1 as hex_digit, multispace1 as multispace, oct_digit1 as oct_digit, space1 as space, }; use crate::error::ErrorKind; use crate::internal::{Err, IResult, Needed}; @@ -337,7 +337,7 @@ fn recognize() { assert_eq!(rod, Ok((semicolon, &b"1234567"[..]))); fn ybd(i: &[u8]) -> IResult<&[u8], &[u8]> { - recognize(bin_digit)(i) + recognize(bin_digit).parse(i) } let rbd = ybd(&b"101010;"[..]); assert_eq!(rbd, Ok((semicolon, &b"101010"[..])));