Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement ExactSizeIterator for Pair iterators #833

Merged
merged 5 commits into from
Apr 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions grammars/benches/json.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ fn bench_line_col(c: &mut Criterion) {
// pairs nested iter (with LineIndex) time: [14.716 µs 14.822 µs 14.964 µs]
// pairs flatten iter (v2.5.2) time: [1.1230 µs 1.1309 µs 1.1428 µs]
// pairs flatten iter (with LineIndex) time: [5.4637 µs 5.6061 µs 5.7886 µs]
// pairs nested collect (v2.5.7) time: [8.4609 µs 8.4644 µs 8.4680 µs]
// pairs nested collect (ExactSize) time: [7.9492 µs 7.9604 µs 7.9751 µs]
// pairs flatten collect (v2.5.7) time: [11.471 µs 11.475 µs 11.480 µs]
// pairs flatten collect (ExactSize) time: [11.058 µs 11.062 µs 11.066 µs]
fn bench_pairs_iter(c: &mut Criterion) {
let data = include_str!("data.json");

Expand All @@ -111,6 +115,22 @@ fn bench_pairs_iter(c: &mut Criterion) {
}
});
});

c.bench_function("pairs nested collect", |b| {
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, data).unwrap();

b.iter(move || {
let _pairs = pairs.clone().collect::<Vec<_>>();
});
});

c.bench_function("pairs flatten collect", |b| {
let pairs = autocorrect::JsonParser::parse(autocorrect::Rule::item, data).unwrap();

b.iter(move || {
let _pairs = pairs.clone().flatten().collect::<Vec<_>>();
});
});
}

criterion_group!(benches, bench_json_parse, bench_line_col, bench_pairs_iter);
Expand Down
30 changes: 30 additions & 0 deletions pest/src/iterators/flat_pairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ impl<'i, R: RuleType> FlatPairs<'i, R> {
}
}

impl<'i, R: RuleType> ExactSizeIterator for FlatPairs<'i, R> {
fn len(&self) -> usize {
// Tokens len is exactly twice as flatten pairs len
(self.end - self.start) >> 1
}
}

impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {
type Item = Pair<'i, R>;

Expand All @@ -122,6 +129,11 @@ impl<'i, R: RuleType> Iterator for FlatPairs<'i, R> {

Some(pair)
}

fn size_hint(&self) -> (usize, Option<usize>) {
let len = <Self as ExactSizeIterator>::len(self);
(len, Some(len))
}
}

impl<'i, R: RuleType> DoubleEndedIterator for FlatPairs<'i, R> {
Expand Down Expand Up @@ -214,4 +226,22 @@ mod tests {
assert_eq!(pair.line_col(), (1, 5));
assert_eq!(pair.line_col(), pair.as_span().start_pos().line_col());
}

#[test]
fn exact_size_iter_for_pairs() {
let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten();
assert_eq!(pairs.len(), pairs.count());

let pairs = AbcParser::parse(Rule::a, "我很漂亮efgh").unwrap().flatten();
assert_eq!(pairs.len(), pairs.count());

let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten();
let pairs = pairs.rev();
assert_eq!(pairs.len(), pairs.count());

let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().flatten();
let pairs_len = pairs.len();
let _ = pairs.next().unwrap();
assert_eq!(pairs.count() + 1, pairs_len);
}
}
42 changes: 42 additions & 0 deletions pest/src/iterators/pairs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ pub struct Pairs<'i, R> {
input: &'i str,
start: usize,
end: usize,
pairs_count: usize,
line_index: Rc<LineIndex>,
}

Expand All @@ -53,11 +54,24 @@ pub fn new<'i, R: RuleType>(
None => Rc::new(LineIndex::new(input)),
};

let mut pairs_count = 0;
let mut cursor = start;
while cursor < end {
cursor = match queue[cursor] {
QueueableToken::Start {
end_token_index, ..
} => end_token_index,
_ => unreachable!(),
} + 1;
pairs_count += 1;
}

Pairs {
queue,
input,
start,
end,
pairs_count,
line_index,
}
}
Expand Down Expand Up @@ -346,15 +360,28 @@ impl<'i, R: RuleType> Pairs<'i, R> {
}
}

impl<'i, R: RuleType> ExactSizeIterator for Pairs<'i, R> {
#[inline]
fn len(&self) -> usize {
self.pairs_count
}
}

impl<'i, R: RuleType> Iterator for Pairs<'i, R> {
type Item = Pair<'i, R>;

fn next(&mut self) -> Option<Self::Item> {
let pair = self.peek()?;

self.start = self.pair() + 1;
self.pairs_count -= 1;
Some(pair)
}

fn size_hint(&self) -> (usize, Option<usize>) {
let len = <Self as ExactSizeIterator>::len(self);
(len, Some(len))
}
}

impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
Expand All @@ -364,6 +391,7 @@ impl<'i, R: RuleType> DoubleEndedIterator for Pairs<'i, R> {
}

self.end = self.pair_from_end();
self.pairs_count -= 1;

let pair = unsafe {
pair::new(
Expand Down Expand Up @@ -640,4 +668,18 @@ mod tests {
assert_eq!(right_numbers.next().unwrap().as_str(), "2");
assert_eq!(right_numbers.next(), None);
}

#[test]
fn exact_size_iter_for_pairs() {
let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
assert_eq!(pairs.len(), pairs.count());

let pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap().rev();
assert_eq!(pairs.len(), pairs.count());

let mut pairs = AbcParser::parse(Rule::a, "abc\nefgh").unwrap();
let pairs_len = pairs.len();
let _ = pairs.next().unwrap();
assert_eq!(pairs.count() + 1, pairs_len);
}
}
28 changes: 28 additions & 0 deletions pest/src/iterators/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,12 @@ impl<'i, R: RuleType> Tokens<'i, R> {
}
}

impl<'i, R: RuleType> ExactSizeIterator for Tokens<'i, R> {
fn len(&self) -> usize {
self.end - self.start
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

likely ok, but perhaps good to check / add a test that this works with non-ASCII UTF-8 characters

}
}

impl<'i, R: RuleType> Iterator for Tokens<'i, R> {
type Item = Token<'i, R>;

Expand All @@ -106,6 +112,11 @@ impl<'i, R: RuleType> Iterator for Tokens<'i, R> {

Some(token)
}

fn size_hint(&self) -> (usize, Option<usize>) {
let len = <Self as ExactSizeIterator>::len(self);
(len, Some(len))
}
}

impl<'i, R: RuleType> DoubleEndedIterator for Tokens<'i, R> {
Expand Down Expand Up @@ -143,4 +154,21 @@ mod tests {
let reverse_tokens = pairs.tokens().rev().collect::<Vec<Token<'_, Rule>>>();
assert_eq!(tokens, reverse_tokens);
}

#[test]
fn exact_size_iter_for_tokens() {
let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens();
assert_eq!(tokens.len(), tokens.count());

let tokens = AbcParser::parse(Rule::a, "我很漂亮e").unwrap().tokens();
assert_eq!(tokens.len(), tokens.count());

let tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens().rev();
assert_eq!(tokens.len(), tokens.count());

let mut tokens = AbcParser::parse(Rule::a, "abcde").unwrap().tokens();
let tokens_len = tokens.len();
let _ = tokens.next().unwrap();
assert_eq!(tokens.count() + 1, tokens_len);
}
}