From a82a96b83a6f6c17f259a53e460b391a40a6f12f Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Thu, 4 Aug 2016 23:33:02 -0400 Subject: [PATCH] Remove the submatch iterators. All use cases can be replaced with Regex::capture_names. --- src/lib.rs | 6 ++-- src/re_bytes.rs | 71 ++--------------------------------------- src/re_unicode.rs | 68 ++-------------------------------------- tests/api.rs | 80 ----------------------------------------------- tests/macros.rs | 5 ++- 5 files changed, 11 insertions(+), 219 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 2538c0da9d..7a649704f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -460,9 +460,9 @@ extern crate utf8_ranges; pub use error::Error; pub use re_builder::unicode::*; pub use re_set::unicode::*; -pub use re_trait::{Locations, SubCapturesPosIter}; +pub use re_trait::Locations; pub use re_unicode::{ - Regex, Captures, SubCapturesIter, SubCapturesNamedIter, + Regex, Captures, CaptureNamesIter, CapturesIter, FindIter, Replacer, NoExpand, SplitsIter, SplitsNIter, quote, @@ -558,7 +558,7 @@ pub mod bytes { pub use re_builder::bytes::*; pub use re_bytes::*; pub use re_set::bytes::*; - pub use re_trait::{Locations, SubCapturesPosIter}; + pub use re_trait::Locations; } mod backtrack; diff --git a/src/re_bytes.rs b/src/re_bytes.rs index e752e4ba29..205f59870c 100644 --- a/src/re_bytes.rs +++ b/src/re_bytes.rs @@ -10,7 +10,6 @@ use std::borrow::Cow; use std::collections::HashMap; -use std::collections::hash_map; use std::fmt; use std::ops::Index; use std::str::FromStr; @@ -22,7 +21,7 @@ use exec::{Exec, ExecNoSync}; use expand::expand_bytes; use error::Error; use re_builder::bytes::RegexBuilder; -use re_trait::{self, RegularExpression, Locations, SubCapturesPosIter}; +use re_trait::{self, RegularExpression, Locations}; /// Match represents a single match of a regex in a haystack. /// @@ -790,29 +789,6 @@ impl<'t> Captures<'t> { self.named_groups.get(name).and_then(|&i| self.get(i)) } - /// Creates an iterator of all the capture groups in order of appearance - /// in the regular expression. - pub fn iter<'c>(&'c self) -> SubCapturesIter<'c, 't> { - SubCapturesIter { idx: 0, caps: self } - } - - /// Creates an iterator of all the capture group positions in order of - /// appearance in the regular expression. Positions are byte indices - /// in terms of the original string matched. - pub fn iter_pos(&self) -> SubCapturesPosIter { - self.locs.iter() - } - - /// Creates an iterator of all named groups as an tuple with the group - /// name and the value. The iterator returns these values in arbitrary - /// order. - pub fn iter_named<'c>(&'c self) -> SubCapturesNamedIter<'c, 't> { - SubCapturesNamedIter { - caps: self, - names: self.named_groups.iter() - } - } - /// Expands all instances of `$name` in `text` to the corresponding capture /// group `name`, and writes them to the `dst` buffer given. /// @@ -873,7 +849,7 @@ impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { let slot_to_name: HashMap<&usize, &String> = self.0.named_groups.iter().map(|(a, b)| (b, a)).collect(); let mut map = f.debug_map(); - for (slot, m) in self.0.iter_pos().enumerate() { + for (slot, m) in self.0.locs.iter().enumerate() { let m = m.map(|(s, e)| escape_bytes(&self.0.text[s..e])); if let Some(ref name) = slot_to_name.get(&slot) { map.entry(&name, &m); @@ -926,49 +902,6 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> { } } -/// An iterator over capture groups for a particular match of a regular -/// expression. -/// -/// `'c` is the lifetime of the captures and `'t` is the lifetime of the -/// matched text. -pub struct SubCapturesIter<'c, 't: 'c> { - idx: usize, - caps: &'c Captures<'t>, -} - -impl<'c, 't> Iterator for SubCapturesIter<'c, 't> { - type Item = Option<&'t [u8]>; - - fn next(&mut self) -> Option> { - if self.idx < self.caps.len() { - self.idx += 1; - Some(self.caps.get(self.idx - 1).map(|m| m.as_bytes())) - } else { - None - } - } -} - -/// An Iterator over named capture groups as a tuple with the group name and -/// the value. -/// -/// `'c` is the lifetime of the captures and `'t` is the lifetime of the -/// matched text. -pub struct SubCapturesNamedIter<'c, 't: 'c> { - caps: &'c Captures<'t>, - names: hash_map::Iter<'c, String, usize>, -} - -impl<'c, 't> Iterator for SubCapturesNamedIter<'c, 't> { - type Item = (&'c str, Option<&'t [u8]>); - - fn next(&mut self) -> Option<(&'c str, Option<&'t [u8]>)> { - self.names.next().map(|(name, &pos)| { - (&**name, self.caps.get(pos).map(|m| m.as_bytes())) - }) - } -} - /// Replacer describes types that can be used to replace matches in a byte /// string. /// diff --git a/src/re_unicode.rs b/src/re_unicode.rs index a0cd4ce32c..7e712d26a2 100644 --- a/src/re_unicode.rs +++ b/src/re_unicode.rs @@ -23,7 +23,7 @@ use exec::{Exec, ExecNoSyncStr}; use expand::expand_str; use re_builder::unicode::RegexBuilder; use re_plugin::Plugin; -use re_trait::{self, RegularExpression, Locations, SubCapturesPosIter}; +use re_trait::{self, RegularExpression, Locations}; /// Escapes all regular expression meta characters in `text`. /// @@ -927,29 +927,6 @@ impl<'t> Captures<'t> { self.named_groups.pos(name).and_then(|i| self.get(i)) } - /// Creates an iterator of all the capture groups in order of appearance - /// in the regular expression. - pub fn iter<'c>(&'c self) -> SubCapturesIter<'c, 't> { - SubCapturesIter { idx: 0, caps: self, } - } - - /// Creates an iterator of all the capture group positions in order of - /// appearance in the regular expression. Positions are byte indices - /// in terms of the original string matched. - pub fn iter_pos(&self) -> SubCapturesPosIter { - self.locs.iter() - } - - /// Creates an iterator of all named groups as an tuple with the group - /// name and the value. The iterator returns these values in arbitrary - /// order. - pub fn iter_named<'c>(&'c self) -> SubCapturesNamedIter<'c, 't> { - SubCapturesNamedIter { - caps: self, - names: self.named_groups.iter() - } - } - /// Expands all instances of `$name` in `text` to the corresponding capture /// group `name`, and writes them to the `dst` buffer given. /// @@ -995,7 +972,7 @@ impl<'c, 't> fmt::Debug for CapturesDebug<'c, 't> { let slot_to_name: HashMap = self.0.named_groups.iter().map(|(a, b)| (b, a)).collect(); let mut map = f.debug_map(); - for (slot, m) in self.0.iter_pos().enumerate() { + for (slot, m) in self.0.locs.iter().enumerate() { let m = m.map(|(s, e)| &self.0.text[s..e]); if let Some(ref name) = slot_to_name.get(&slot) { map.entry(&name, &m); @@ -1048,47 +1025,6 @@ impl<'t, 'i> Index<&'i str> for Captures<'t> { } } -/// An iterator over capture groups for a particular match of a regular -/// expression. -/// -/// `'c` is the lifetime of the captures. -pub struct SubCapturesIter<'c, 't: 'c> { - idx: usize, - caps: &'c Captures<'t>, -} - -impl<'c, 't> Iterator for SubCapturesIter<'c, 't> { - type Item = Option<&'t str>; - - fn next(&mut self) -> Option> { - if self.idx < self.caps.len() { - self.idx += 1; - Some(self.caps.get(self.idx - 1).map(|m| m.as_str())) - } else { - None - } - } -} - -/// An Iterator over named capture groups as a tuple with the group -/// name and the value. -/// -/// `'c` is the lifetime of the captures. -pub struct SubCapturesNamedIter<'c, 't: 'c> { - caps: &'c Captures<'t>, - names: NamedGroupsIter<'c>, -} - -impl<'c, 't> Iterator for SubCapturesNamedIter<'c, 't> { - type Item = (&'c str, Option<&'t str>); - - fn next(&mut self) -> Option<(&'c str, Option<&'t str>)> { - self.names.next().map(|(name, pos)| { - (name, self.caps.get(pos).map(|m| m.as_str())) - }) - } -} - /// An iterator that yields all non-overlapping capture groups matching a /// particular regular expression. /// diff --git a/tests/api.rs b/tests/api.rs index afc6b16600..7221995b0e 100644 --- a/tests/api.rs +++ b/tests/api.rs @@ -140,86 +140,6 @@ fn capture_misc() { assert_eq!(t!("c"), match_text!(cap.name("b").unwrap())); } -#[test] -fn capture_iter() { - let re = regex!(r"(.)(?P.)(.)(?P.)"); - let cap = re.captures(t!("abcd")).unwrap(); - assert_eq!(5, cap.len()); - - let expected = vec![ - t!("abcd"), t!("a"), t!("b"), t!("c"), t!("d"), - ].into_iter().map(Some).collect::>(); - let got = cap.iter().collect::>(); - assert_eq!(expected, got); -} - -#[test] -fn capture_iter_missing() { - let re = regex!(r"(.)(?Pa)?(.)(?P.)"); - let cap = re.captures(t!("abc")).unwrap(); - assert_eq!(5, cap.len()); - - let expected = vec![ - Some(t!("abc")), Some(t!("a")), None, Some(t!("b")), Some(t!("c")), - ]; - let got = cap.iter().collect::>(); - assert_eq!(expected, got); -} - -#[test] -fn capture_iter_pos() { - let re = regex!(r"(.)(?P.)(.)(?P.)"); - let cap = re.captures(t!("abcd")).unwrap(); - - let expected = vec![ - (0, 4), (0, 1), (1, 2), (2, 3), (3, 4), - ].into_iter().map(Some).collect::>(); - let got = cap.iter_pos().collect::>(); - assert_eq!(expected, got); -} - -#[test] -fn capture_iter_pos_missing() { - let re = regex!(r"(.)(?Pa)?(.)(?P.)"); - let cap = re.captures(t!("abc")).unwrap(); - - let expected = vec![ - Some((0, 3)), Some((0, 1)), None, Some((1, 2)), Some((2, 3)), - ]; - let got = cap.iter_pos().collect::>(); - assert_eq!(expected, got); -} - -#[test] -fn capture_iter_named() { - let re = regex!(r"(.)(?P.)(.)(?P.)"); - let cap = re.captures(t!("abcd")).unwrap(); - - let expected1 = vec![ - ("a", Some(t!("b"))), ("b", Some(t!("d"))), - ]; - let expected2 = vec![ - ("b", Some(t!("d"))), ("a", Some(t!("b"))), - ]; - let got = cap.iter_named().collect::>(); - assert!(got == expected1 || got == expected2); -} - -#[test] -fn capture_iter_named_missing() { - let re = regex!(r"(.)(?P.)?(.)(?P.)"); - let cap = re.captures(t!("abc")).unwrap(); - - let expected1 = vec![ - ("a", None), ("b", Some(t!("c"))), - ]; - let expected2 = vec![ - ("b", Some(t!("c"))), ("a", None), - ]; - let got = cap.iter_named().collect::>(); - assert!(got == expected1 || got == expected2); -} - expand!(expand1, r"(?P\w+)", "abc", "$foo", "abc"); expand!(expand2, r"(?P\w+)", "abc", "$0", "abc"); expand!(expand3, r"(?P\w+)", "abc", "$1", "abc"); diff --git a/tests/macros.rs b/tests/macros.rs index 9d30dd16e2..5badc89b53 100644 --- a/tests/macros.rs +++ b/tests/macros.rs @@ -20,7 +20,10 @@ macro_rules! mat( Some(c) => { assert!(r.is_match(text)); assert!(r.shortest_match(text).is_some()); - c.iter_pos().collect() + r.capture_names() + .enumerate() + .map(|(i, _)| c.get(i).map(|m| (m.start(), m.end()))) + .collect() } None => vec![None], };