From 7a6c400b3fc0e3ea5846d464b8d4b957254bd194 Mon Sep 17 00:00:00 2001 From: Mark Rousskov Date: Sat, 6 Jan 2024 12:44:40 -0500 Subject: [PATCH] Update to regex-automata v0.4 --- .github/workflows/ci.yml | 7 +- Cargo.toml | 7 +- src/lib.rs | 151 ++++++++++++++------------------------- 3 files changed, 63 insertions(+), 102 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2878a82..3236ff3 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -42,7 +42,12 @@ jobs: - name: Run tests uses: actions-rs/cargo@v1 with: - command: build + command: test + - name: Run tests (unicode) + uses: actions-rs/cargo@v1 + with: + command: test + args: --features unicode clippy_check: runs-on: ubuntu-latest diff --git a/Cargo.toml b/Cargo.toml index 99f0a41..3babdf7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "matchers" -version = "0.1.0" +version = "0.2.0" authors = ["Eliza Weisman "] edition = "2018" license = "MIT" @@ -18,4 +18,7 @@ keywords = ["regex", "match", "pattern", "streaming"] maintenance = { status = "experimental" } [dependencies] -regex-automata = "0.1" +regex-automata = { version = "0.4", default-features = false, features = ["syntax", "dfa-build", "dfa-search"] } + +[features] +unicode = ["regex-automata/unicode"] diff --git a/src/lib.rs b/src/lib.rs index 2720a1a..fd05e07 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,38 +24,33 @@ //! //! [`regex`]: https://crates.io/crates/regex //! [`regex-automata`]: https://crates.io/crates/regex-automata -//! [syntax]: https://docs.rs/regex-automata/0.1.7/regex_automata/#syntax +//! [syntax]: https://docs.rs/regex-automata/0.4.3/regex_automata/#syntax -use regex_automata::{dense, DenseDFA, SparseDFA, StateID, DFA}; -use std::{fmt, io, marker::PhantomData, str::FromStr}; +use std::{fmt, io, str::FromStr}; -pub use regex_automata::Error; +pub use regex_automata::dfa::dense::BuildError; +use regex_automata::dfa::dense::DFA; +use regex_automata::dfa::Automaton; +use regex_automata::util::primitives::StateID; +use regex_automata::Anchored; /// A compiled match pattern that can match multipe inputs, or return a /// [`Matcher`] that matches a single input. /// /// [`Matcher`]: ../struct.Matcher.html #[derive(Debug, Clone)] -pub struct Pattern, S>> -where - S: StateID, - A: DFA, -{ +pub struct Pattern>> { automaton: A, + anchored: Anchored, } /// A reference to a [`Pattern`] that matches a single input. /// /// [`Pattern`]: ../struct.Pattern.html #[derive(Debug, Clone)] -pub struct Matcher<'a, S = usize, A = DenseDFA<&'a [S], S>> -where - S: StateID, - A: DFA, -{ +pub struct Matcher>> { automaton: A, - state: S, - _lt: PhantomData<&'a ()>, + state: StateID, } // === impl Pattern === @@ -86,9 +81,12 @@ impl Pattern { /// // sequence when it's followed by non-matching characters: /// assert!(pattern.display_matches(&"hello world! aaaaab")); /// ``` - pub fn new(pattern: &str) -> Result { - let automaton = DenseDFA::new(pattern)?; - Ok(Pattern { automaton }) + pub fn new(pattern: &str) -> Result { + let automaton = DFA::new(pattern)?; + Ok(Pattern { + automaton, + anchored: Anchored::No, + }) } /// Returns a new `Pattern` anchored at the beginning of the input stream, @@ -120,25 +118,36 @@ impl Pattern { /// .expect("regex is not invalid"); /// assert!(pattern2.display_matches(&"hello world! aaaaab")); /// ``` - pub fn new_anchored(pattern: &str) -> Result { - let automaton = dense::Builder::new().anchored(true).build(pattern)?; - Ok(Pattern { automaton }) + pub fn new_anchored(pattern: &str) -> Result { + let automaton = DFA::new(pattern)?; + Ok(Pattern { + automaton, + anchored: Anchored::Yes, + }) } } impl FromStr for Pattern { - type Err = Error; + type Err = BuildError; fn from_str(s: &str) -> Result { Self::new(s) } } -impl Pattern -where - S: StateID, - A: DFA, - Self: for<'a> ToMatcher<'a, S>, -{ +impl Pattern { + /// Obtains a `matcher` for this pattern. + /// + /// This conversion is useful when wanting to incrementally feed input (via + /// `io::Write`/`fmt::Write` to a matcher). Otherwise, the convenience methods on Pattern + /// suffice. + pub fn matcher(&self) -> Matcher<&'_ A> { + let config = regex_automata::util::start::Config::new().anchored(self.anchored); + Matcher { + automaton: &self.automaton, + state: self.automaton.start_state(&config).unwrap(), + } + } + /// Returns `true` if this pattern matches the given string. #[inline] pub fn matches(&self, s: &impl AsRef) -> bool { @@ -220,35 +229,24 @@ where // === impl Matcher === -impl<'a, S, A> Matcher<'a, S, A> +impl Matcher where - S: StateID, - A: DFA, + A: Automaton, { - fn new(automaton: A) -> Self { - let state = automaton.start_state(); - Self { - automaton, - state, - _lt: PhantomData, - } - } - #[inline] fn advance(&mut self, input: u8) { - self.state = unsafe { - // It's safe to call `next_state_unchecked` since the matcher may - // only be constructed by a `Pattern`, which, in turn,can only be - // constructed with a valid DFA. - self.automaton.next_state_unchecked(self.state, input) - }; + // It's safe to call `next_state_unchecked` since the matcher may + // only be constructed by a `Pattern`, which, in turn, can only be + // constructed with a valid DFA. + self.state = unsafe { self.automaton.next_state_unchecked(self.state, input) }; } /// Returns `true` if this `Matcher` has matched any input that has been /// provided. #[inline] pub fn is_matched(&self) -> bool { - self.automaton.is_match_state(self.state) + let eoi_state = self.automaton.next_eoi_state(self.state); + self.automaton.is_match_state(eoi_state) } /// Returns `true` if this pattern matches the formatted output of the given @@ -293,11 +291,7 @@ where } } -impl<'a, S, A> fmt::Write for Matcher<'a, S, A> -where - S: StateID, - A: DFA, -{ +impl fmt::Write for Matcher { fn write_str(&mut self, s: &str) -> fmt::Result { for &byte in s.as_bytes() { self.advance(byte); @@ -309,11 +303,7 @@ where } } -impl<'a, S, A> io::Write for Matcher<'a, S, A> -where - S: StateID, - A: DFA, -{ +impl io::Write for Matcher { fn write(&mut self, bytes: &[u8]) -> Result { let mut i = 0; for &byte in bytes { @@ -331,43 +321,6 @@ where } } -pub trait ToMatcher<'a, S> -where - Self: crate::sealed::Sealed, - S: StateID + 'a, -{ - type Automaton: DFA; - fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton>; -} - -impl crate::sealed::Sealed for Pattern, S>> where S: StateID {} - -impl<'a, S> ToMatcher<'a, S> for Pattern, S>> -where - S: StateID + 'a, -{ - type Automaton = DenseDFA<&'a [S], S>; - fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> { - Matcher::new(self.automaton.as_ref()) - } -} - -impl<'a, S> ToMatcher<'a, S> for Pattern, S>> -where - S: StateID + 'a, -{ - type Automaton = SparseDFA<&'a [u8], S>; - fn matcher(&'a self) -> Matcher<'a, S, Self::Automaton> { - Matcher::new(self.automaton.as_ref()) - } -} - -impl crate::sealed::Sealed for Pattern, S>> where S: StateID {} - -mod sealed { - pub trait Sealed {} -} - #[cfg(test)] mod test { use super::*; @@ -409,7 +362,7 @@ mod test { } } - fn test_debug_matches(new_pattern: impl Fn(&str) -> Result) { + fn test_debug_matches(new_pattern: impl Fn(&str) -> Result) { let pat = new_pattern("hello world").unwrap(); assert!(pat.debug_matches(&Str::hello_world())); @@ -420,7 +373,7 @@ mod test { assert_eq!(pat.debug_matches(&Str::hello_world()), false); } - fn test_display_matches(new_pattern: impl Fn(&str) -> Result) { + fn test_display_matches(new_pattern: impl Fn(&str) -> Result) { let pat = new_pattern("hello world").unwrap(); assert!(pat.display_matches(&Str::hello_world())); @@ -431,7 +384,7 @@ mod test { assert_eq!(pat.display_matches(&Str::hello_world()), false); } - fn test_reader_matches(new_pattern: impl Fn(&str) -> Result) { + fn test_reader_matches(new_pattern: impl Fn(&str) -> Result) { let pat = new_pattern("hello world").unwrap(); assert!(pat .read_matches(Str::hello_world().to_reader()) @@ -450,7 +403,7 @@ mod test { ); } - fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result) { + fn test_debug_rep_patterns(new_pattern: impl Fn(&str) -> Result) { let pat = new_pattern("a+b").unwrap(); assert!(pat.debug_matches(&Str::new("ab"))); assert!(pat.debug_matches(&Str::new("aaaab")));