From a00625ec81e443e6324fbab8e3052d00501912b1 Mon Sep 17 00:00:00 2001 From: MucTepDayH16 Date: Thu, 13 Apr 2023 01:04:04 +0300 Subject: [PATCH 1/4] ExactSizeIterator impl for pair iterators Signed-off-by: MucTepDayH16 --- pest/src/iterators/flat_pairs.rs | 39 ++++++++++++++++++++++++ pest/src/iterators/pairs.rs | 42 ++++++++++++++++++++++++++ pest/src/iterators/tokens.rs | 29 ++++++++++++++++++ pest/src/span.rs | 51 ++++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+) diff --git a/pest/src/iterators/flat_pairs.rs b/pest/src/iterators/flat_pairs.rs index 52a20740..7fd94c40 100644 --- a/pest/src/iterators/flat_pairs.rs +++ b/pest/src/iterators/flat_pairs.rs @@ -102,6 +102,22 @@ impl<'i, R: RuleType> FlatPairs<'i, R> { } } +impl<'i, R: RuleType> ExactSizeIterator for FlatPairs<'i, R> { + fn len(&self) -> usize { + let mut start = self.start; + let mut count = 0; + while start < self.end { + start += 1; + while start < self.end && !self.is_start(start) { + start += 1; + } + + count += 1; + } + count + } +} + impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> { type Item = Pair<'i, R>; @@ -122,6 +138,11 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> { Some(pair) } + + fn size_hint(&self) -> (usize, Option) { + let len = ::len(self); + (len, Some(len)) + } } impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> { @@ -214,4 +235,22 @@ mod tests { assert_eq!(pair.line_col(), (1, 5)); assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col()); } + + #[test] + fn exact_size_iter_for_pairs() { + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); + let pairs_len = pairs.len(); + let pairs = pairs.collect::>(); + assert_eq!(pairs.len(), pairs_len); + + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten().rev(); + let pairs_len = pairs.len(); + let pairs = pairs.collect::>(); + assert_eq!(pairs.len(), pairs_len); + + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); + let pairs_len = pairs.len(); + let _ = pairs.next().unwrap(); + assert_eq!(pairs.len() + 1, pairs_len); + } } diff --git a/pest/src/iterators/pairs.rs b/pest/src/iterators/pairs.rs index c21a7fae..bcc6e843 100644 --- a/pest/src/iterators/pairs.rs +++ b/pest/src/iterators/pairs.rs @@ -237,6 +237,23 @@ impl<'i, R: RuleType> Pairs<'i, R> { } } +impl<'i, R: RuleType> ExactSizeIterator for Pairs<'i, R> { + fn len(&self) -> usize { + let mut start = self.start; + let mut count = 0; + while start < self.end { + start = match self.queue[start] { + QueueableToken::Start { + end_token_index, .. + } => end_token_index + 1, + _ => unreachable!(), + }; + count += 1; + } + count + } +} + impl<'i, R: RuleType> Iterator for Pairs<'i, R> { type Item = Pair<'i, R>; @@ -246,6 +263,11 @@ impl<'i, R: RuleType> Iterator for Pairs<'i, R> { self.start = self.pair() + 1; Some(pair) } + + fn size_hint(&self) -> (usize, Option) { + let len = ::len(self); + (len, Some(len)) + } } impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { @@ -327,6 +349,8 @@ impl<'i, R: RuleType> ::serde::Serialize for Pairs<'i, R> { #[cfg(test)] mod tests { + use crate::iterators::Pair; + use super::super::super::macros::tests::*; use super::super::super::Parser; use alloc::borrow::ToOwned; @@ -479,4 +503,22 @@ mod tests { assert_eq!(pair.as_str(), "abc"); assert_eq!(pair.line_col(), (1, 1)); } + + #[test] + fn exact_size_iter_for_pairs() { + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); + let pairs_len = pairs.len(); + let pairs = pairs.collect::>>(); + assert_eq!(pairs.len(), pairs_len); + + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev(); + let pairs_len = pairs.len(); + let pairs = pairs.collect::>>(); + assert_eq!(pairs.len(), pairs_len); + + let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); + let pairs_len = pairs.len(); + let _ = pairs.next().unwrap(); + assert_eq!(pairs.len() + 1, pairs_len); + } } diff --git a/pest/src/iterators/tokens.rs b/pest/src/iterators/tokens.rs index 0d462711..5be92080 100644 --- a/pest/src/iterators/tokens.rs +++ b/pest/src/iterators/tokens.rs @@ -92,6 +92,12 @@ impl<'i, R: RuleType> Tokens<'i, R> { } } +impl<'i, R: RuleType> ExactSizeIterator for Tokens<'i, R> { + fn len(&self) -> usize { + self.end - self.start + } +} + impl<'i, R: RuleType> Iterator for Tokens<'i, R> { type Item = Token<'i, R>; @@ -106,6 +112,11 @@ impl<'i, R: RuleType> Iterator for Tokens<'i, R> { Some(token) } + + fn size_hint(&self) -> (usize, Option) { + let len = ::len(self); + (len, Some(len)) + } } impl<'i, R: RuleType> DoubleEndedIterator for Tokens<'i, R> { @@ -143,4 +154,22 @@ mod tests { let reverse_tokens = pairs.tokens().rev().collect::>>(); assert_eq!(tokens, reverse_tokens); } + + #[test] + fn exact_size_iter_for_tokens() { + let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens(); + let tokens_len = tokens.len(); + let tokens = tokens.collect::>>(); + assert_eq!(tokens.len(), tokens_len); + + let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens().rev(); + let tokens_len = tokens.len(); + let tokens = tokens.collect::>>(); + assert_eq!(tokens.len(), tokens_len); + + let mut tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens(); + let tokens_len = tokens.len(); + let _ = tokens.next().unwrap(); + assert_eq!(tokens.len() + 1, tokens_len); + } } diff --git a/pest/src/span.rs b/pest/src/span.rs index 7603c43c..fa4487b5 100644 --- a/pest/src/span.rs +++ b/pest/src/span.rs @@ -298,6 +298,26 @@ pub struct LinesSpan<'i> { pos: usize, } +impl<'i> ExactSizeIterator for LinesSpan<'i> { + fn len(&self) -> usize { + let mut self_pos = self.pos; + let mut count = 0; + while self_pos < self.span.end { + let pos = match position::Position::new(self.span.input, self_pos) { + Some(pos) => pos, + None => break, + }; + if pos.at_end() { + break; + } + + self_pos = pos.find_line_end(); + count += 1; + } + count + } +} + impl<'i> Iterator for LinesSpan<'i> { type Item = Span<'i>; fn next(&mut self) -> Option { @@ -314,6 +334,11 @@ impl<'i> Iterator for LinesSpan<'i> { Span::new(self.span.input, line_start, self.pos) } + + fn size_hint(&self) -> (usize, Option) { + let len = ::len(self); + (len, Some(len)) + } } /// Line iterator for Spans, created by [`Span::lines()`]. @@ -325,11 +350,21 @@ pub struct Lines<'i> { inner: LinesSpan<'i>, } +impl<'i> ExactSizeIterator for Lines<'i> { + fn len(&self) -> usize { + self.inner.len() + } +} + impl<'i> Iterator for Lines<'i> { type Item = &'i str; fn next(&mut self) -> Option { self.inner.next().map(|span| span.as_str()) } + + fn size_hint(&self) -> (usize, Option) { + self.inner.size_hint() + } } #[cfg(test)] @@ -447,4 +482,20 @@ mod tests { lines ); } + + #[test] + fn exact_size_iter_for_lines_span() { + let input = "abc\ndef\nghi"; + let span = Span::new(input, 1, 7).unwrap(); + + let lines_span = span.lines_span(); + let lines_len = lines_span.len(); + let lines_span = lines_span.collect::>(); + assert_eq!(lines_span.len(), lines_len); + + let mut lines_span = span.lines_span(); + let lines_len = lines_span.len(); + let _ = lines_span.next().unwrap(); + assert_eq!(lines_span.len() + 1, lines_len); + } } From 5127e79357743e97eef8489e780e9f2026ed4766 Mon Sep 17 00:00:00 2001 From: MucTepDayH16 Date: Thu, 13 Apr 2023 01:13:54 +0300 Subject: [PATCH 2/4] fmt Signed-off-by: MucTepDayH16 --- pest/src/iterators/flat_pairs.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pest/src/iterators/flat_pairs.rs b/pest/src/iterators/flat_pairs.rs index 7fd94c40..c36c56c8 100644 --- a/pest/src/iterators/flat_pairs.rs +++ b/pest/src/iterators/flat_pairs.rs @@ -243,7 +243,8 @@ mod tests { let pairs = pairs.collect::>(); assert_eq!(pairs.len(), pairs_len); - let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten().rev(); + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); + let pairs = pairs.rev(); let pairs_len = pairs.len(); let pairs = pairs.collect::>(); assert_eq!(pairs.len(), pairs_len); From e82aa6f8407203c76d74d27f21d00666279a4b8c Mon Sep 17 00:00:00 2001 From: MucTepDayH16 Date: Thu, 13 Apr 2023 01:25:32 +0300 Subject: [PATCH 3/4] Unit, Style, and Lint Testing Signed-off-by: MucTepDayH16 --- pest/src/iterators/flat_pairs.rs | 10 +++------- pest/src/iterators/pairs.rs | 12 +++--------- pest/src/iterators/tokens.rs | 10 +++------- pest/src/span.rs | 6 ++---- 4 files changed, 11 insertions(+), 27 deletions(-) diff --git a/pest/src/iterators/flat_pairs.rs b/pest/src/iterators/flat_pairs.rs index c36c56c8..6142db3b 100644 --- a/pest/src/iterators/flat_pairs.rs +++ b/pest/src/iterators/flat_pairs.rs @@ -239,19 +239,15 @@ mod tests { #[test] fn exact_size_iter_for_pairs() { let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); - let pairs_len = pairs.len(); - let pairs = pairs.collect::>(); - assert_eq!(pairs.len(), pairs_len); + assert_eq!(pairs.len(), pairs.count()); let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); let pairs = pairs.rev(); - let pairs_len = pairs.len(); - let pairs = pairs.collect::>(); - assert_eq!(pairs.len(), pairs_len); + assert_eq!(pairs.len(), pairs.count()); let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); let pairs_len = pairs.len(); let _ = pairs.next().unwrap(); - assert_eq!(pairs.len() + 1, pairs_len); + assert_eq!(pairs.count() + 1, pairs_len); } } diff --git a/pest/src/iterators/pairs.rs b/pest/src/iterators/pairs.rs index bcc6e843..6866cdf6 100644 --- a/pest/src/iterators/pairs.rs +++ b/pest/src/iterators/pairs.rs @@ -349,8 +349,6 @@ impl<'i, R: RuleType> ::serde::Serialize for Pairs<'i, R> { #[cfg(test)] mod tests { - use crate::iterators::Pair; - use super::super::super::macros::tests::*; use super::super::super::Parser; use alloc::borrow::ToOwned; @@ -507,18 +505,14 @@ mod tests { #[test] fn exact_size_iter_for_pairs() { let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); - let pairs_len = pairs.len(); - let pairs = pairs.collect::>>(); - assert_eq!(pairs.len(), pairs_len); + assert_eq!(pairs.len(), pairs.count()); let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev(); - let pairs_len = pairs.len(); - let pairs = pairs.collect::>>(); - assert_eq!(pairs.len(), pairs_len); + assert_eq!(pairs.len(), pairs.count()); let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap(); let pairs_len = pairs.len(); let _ = pairs.next().unwrap(); - assert_eq!(pairs.len() + 1, pairs_len); + assert_eq!(pairs.count() + 1, pairs_len); } } diff --git a/pest/src/iterators/tokens.rs b/pest/src/iterators/tokens.rs index 5be92080..d9b991f1 100644 --- a/pest/src/iterators/tokens.rs +++ b/pest/src/iterators/tokens.rs @@ -158,18 +158,14 @@ mod tests { #[test] fn exact_size_iter_for_tokens() { let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens(); - let tokens_len = tokens.len(); - let tokens = tokens.collect::>>(); - assert_eq!(tokens.len(), tokens_len); + assert_eq!(tokens.len(), tokens.count()); let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens().rev(); - let tokens_len = tokens.len(); - let tokens = tokens.collect::>>(); - assert_eq!(tokens.len(), tokens_len); + assert_eq!(tokens.len(), tokens.count()); let mut tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens(); let tokens_len = tokens.len(); let _ = tokens.next().unwrap(); - assert_eq!(tokens.len() + 1, tokens_len); + assert_eq!(tokens.count() + 1, tokens_len); } } diff --git a/pest/src/span.rs b/pest/src/span.rs index fa4487b5..ff526190 100644 --- a/pest/src/span.rs +++ b/pest/src/span.rs @@ -489,13 +489,11 @@ mod tests { let span = Span::new(input, 1, 7).unwrap(); let lines_span = span.lines_span(); - let lines_len = lines_span.len(); - let lines_span = lines_span.collect::>(); - assert_eq!(lines_span.len(), lines_len); + assert_eq!(lines_span.len(), lines_span.count()); let mut lines_span = span.lines_span(); let lines_len = lines_span.len(); let _ = lines_span.next().unwrap(); - assert_eq!(lines_span.len() + 1, lines_len); + assert_eq!(lines_span.count() + 1, lines_len); } } From 5ed9a9759574467d82d30b3293ee228974b89151 Mon Sep 17 00:00:00 2001 From: MucTepDayH16 Date: Mon, 17 Apr 2023 12:29:21 +0300 Subject: [PATCH 4/4] Evaluate iter len in linear time Signed-off-by: MucTepDayH16 --- grammars/benches/json.rs | 20 +++++++++++++ pest/src/iterators/flat_pairs.rs | 16 ++++------- pest/src/iterators/pairs.rs | 30 +++++++++++-------- pest/src/iterators/tokens.rs | 3 ++ pest/src/span.rs | 49 -------------------------------- 5 files changed, 46 insertions(+), 72 deletions(-) diff --git a/grammars/benches/json.rs b/grammars/benches/json.rs index 8723db0c..8afd0bae 100644 --- a/grammars/benches/json.rs +++ b/grammars/benches/json.rs @@ -88,6 +88,10 @@ fn bench_line_col(c: &mut Criterion) { // pairs nested iter (with LineIndex) time: [14.716 µs 14.822 µs 14.964 µs] // pairs flatten iter (v2.5.2) time: [1.1230 µs 1.1309 µs 1.1428 µs] // pairs flatten iter (with LineIndex) time: [5.4637 µs 5.6061 µs 5.7886 µs] +// pairs nested collect (v2.5.7) time: [8.4609 µs 8.4644 µs 8.4680 µs] +// pairs nested collect (ExactSize) time: [7.9492 µs 7.9604 µs 7.9751 µs] +// pairs flatten collect (v2.5.7) time: [11.471 µs 11.475 µs 11.480 µs] +// pairs flatten collect (ExactSize) time: [11.058 µs 11.062 µs 11.066 µs] fn bench_pairs_iter(c: &mut Criterion) { let data = include_str!("data.json"); @@ -111,6 +115,22 @@ fn bench_pairs_iter(c: &mut Criterion) { } }); }); + + c.bench_function("pairs nested collect", |b| { + let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, data).unwrap(); + + b.iter(move || { + let _pairs = pairs.clone().collect::>(); + }); + }); + + c.bench_function("pairs flatten collect", |b| { + let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, data).unwrap(); + + b.iter(move || { + let _pairs = pairs.clone().flatten().collect::>(); + }); + }); } criterion_group!(benches, bench_json_parse, bench_line_col, bench_pairs_iter); diff --git a/pest/src/iterators/flat_pairs.rs b/pest/src/iterators/flat_pairs.rs index 31b4decb..9b92f557 100644 --- a/pest/src/iterators/flat_pairs.rs +++ b/pest/src/iterators/flat_pairs.rs @@ -104,17 +104,8 @@ impl<'i, R: RuleType> FlatPairs<'i, R> { impl<'i, R: RuleType> ExactSizeIterator for FlatPairs<'i, R> { fn len(&self) -> usize { - let mut start = self.start; - let mut count = 0; - while start < self.end { - start += 1; - while start < self.end && !self.is_start(start) { - start += 1; - } - - count += 1; - } - count + // Tokens len is exactly twice as flatten pairs len + (self.end - self.start) >> 1 } } @@ -241,6 +232,9 @@ mod tests { let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); assert_eq!(pairs.len(), pairs.count()); + let pairs = AbcParser::parse(Rule::a, "我很漂亮efgh").unwrap().flatten(); + assert_eq!(pairs.len(), pairs.count()); + let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten(); let pairs = pairs.rev(); assert_eq!(pairs.len(), pairs.count()); diff --git a/pest/src/iterators/pairs.rs b/pest/src/iterators/pairs.rs index 3ca57de7..ab7df75e 100644 --- a/pest/src/iterators/pairs.rs +++ b/pest/src/iterators/pairs.rs @@ -38,6 +38,7 @@ pub struct Pairs<'i, R> { input: &'i str, start: usize, end: usize, + pairs_count: usize, line_index: Rc, } @@ -53,11 +54,24 @@ pub fn new<'i, R: RuleType>( None => Rc::new(LineIndex::new(input)), }; + let mut pairs_count = 0; + let mut cursor = start; + while cursor < end { + cursor = match queue[cursor] { + QueueableToken::Start { + end_token_index, .. + } => end_token_index, + _ => unreachable!(), + } + 1; + pairs_count += 1; + } + Pairs { queue, input, start, end, + pairs_count, line_index, } } @@ -347,19 +361,9 @@ impl<'i, R: RuleType> Pairs<'i, R> { } impl<'i, R: RuleType> ExactSizeIterator for Pairs<'i, R> { + #[inline] fn len(&self) -> usize { - let mut start = self.start; - let mut count = 0; - while start < self.end { - start = match self.queue[start] { - QueueableToken::Start { - end_token_index, .. - } => end_token_index + 1, - _ => unreachable!(), - }; - count += 1; - } - count + self.pairs_count } } @@ -370,6 +374,7 @@ impl<'i, R: RuleType> Iterator for Pairs<'i, R> { let pair = self.peek()?; self.start = self.pair() + 1; + self.pairs_count -= 1; Some(pair) } @@ -386,6 +391,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> { } self.end = self.pair_from_end(); + self.pairs_count -= 1; let pair = unsafe { pair::new( diff --git a/pest/src/iterators/tokens.rs b/pest/src/iterators/tokens.rs index 588a913e..41cbc472 100644 --- a/pest/src/iterators/tokens.rs +++ b/pest/src/iterators/tokens.rs @@ -160,6 +160,9 @@ mod tests { let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens(); assert_eq!(tokens.len(), tokens.count()); + let tokens = AbcParser::parse(Rule::a, "我很漂亮e").unwrap().tokens(); + assert_eq!(tokens.len(), tokens.count()); + let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens().rev(); assert_eq!(tokens.len(), tokens.count()); diff --git a/pest/src/span.rs b/pest/src/span.rs index ff526190..7603c43c 100644 --- a/pest/src/span.rs +++ b/pest/src/span.rs @@ -298,26 +298,6 @@ pub struct LinesSpan<'i> { pos: usize, } -impl<'i> ExactSizeIterator for LinesSpan<'i> { - fn len(&self) -> usize { - let mut self_pos = self.pos; - let mut count = 0; - while self_pos < self.span.end { - let pos = match position::Position::new(self.span.input, self_pos) { - Some(pos) => pos, - None => break, - }; - if pos.at_end() { - break; - } - - self_pos = pos.find_line_end(); - count += 1; - } - count - } -} - impl<'i> Iterator for LinesSpan<'i> { type Item = Span<'i>; fn next(&mut self) -> Option { @@ -334,11 +314,6 @@ impl<'i> Iterator for LinesSpan<'i> { Span::new(self.span.input, line_start, self.pos) } - - fn size_hint(&self) -> (usize, Option) { - let len = ::len(self); - (len, Some(len)) - } } /// Line iterator for Spans, created by [`Span::lines()`]. @@ -350,21 +325,11 @@ pub struct Lines<'i> { inner: LinesSpan<'i>, } -impl<'i> ExactSizeIterator for Lines<'i> { - fn len(&self) -> usize { - self.inner.len() - } -} - impl<'i> Iterator for Lines<'i> { type Item = &'i str; fn next(&mut self) -> Option { self.inner.next().map(|span| span.as_str()) } - - fn size_hint(&self) -> (usize, Option) { - self.inner.size_hint() - } } #[cfg(test)] @@ -482,18 +447,4 @@ mod tests { lines ); } - - #[test] - fn exact_size_iter_for_lines_span() { - let input = "abc\ndef\nghi"; - let span = Span::new(input, 1, 7).unwrap(); - - let lines_span = span.lines_span(); - assert_eq!(lines_span.len(), lines_span.count()); - - let mut lines_span = span.lines_span(); - let lines_len = lines_span.len(); - let _ = lines_span.next().unwrap(); - assert_eq!(lines_span.count() + 1, lines_len); - } }