From 9c9e3c16da3eaac129651ecf662dc858993a639a Mon Sep 17 00:00:00 2001 From: Guilhem Vallat Date: Mon, 31 Oct 2022 15:05:46 +0100 Subject: [PATCH] Add `UniCase::contains()` --- src/lib.rs | 17 ++++++++++++++ src/unicode/mod.rs | 58 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index 0c02b5e..a2f3307 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -194,6 +194,23 @@ impl UniCase { } } +impl> UniCase { + /// Returns true if the given pattern matches a sub-slice of this string slice. + /// + /// Returns false if it does not. + #[inline] + pub fn contains>(&self, pat: &UniCase) -> bool { + match (&self.0, &pat.0) { + (&Encoding::Ascii(ref x), &Encoding::Ascii(ref p)) => x.as_ref().contains(p.as_ref()), + (&Encoding::Unicode(ref x), &Encoding::Unicode(ref p)) => x.contains(p), + (&Encoding::Ascii(ref x), &Encoding::Unicode(ref p)) => Unicode(x.as_ref()).contains(p), + (&Encoding::Unicode(ref x), &Encoding::Ascii(ref p)) => { + x.contains(&Unicode(p.as_ref())) + } + } + } +} + impl Deref for UniCase { type Target = S; #[inline] diff --git a/src/unicode/mod.rs b/src/unicode/mod.rs index 8b88733..385b335 100644 --- a/src/unicode/mod.rs +++ b/src/unicode/mod.rs @@ -33,6 +33,49 @@ impl, S2: AsRef> PartialEq> for Unicode { } } +impl> Unicode { + /// Returns true if the given pattern matches a sub-slice of this string slice. + /// + /// Returns false if it does not. + #[inline] + pub fn contains>(&self, pat: &Unicode) -> bool { + let mut left = self.0.as_ref().chars().flat_map(lookup); + let mut pat = pat.0.as_ref().chars().flat_map(lookup); + + match pat.next() { + Some(p0) => 'out: loop { + match left.next() { + Some(e) if e == p0 => { + let mut left = left.clone(); + let mut pat = pat.clone(); + + loop { + let p = match pat.next() { + None => break 'out true, + Some(p) => p, + }; + + let e = match left.next() { + None => break 'out false, + Some(e) => e, + }; + + if e != p { + break; + } + } + } + Some(_) => { + continue; + } + None => break false, + } + }, + None => true, + } + } +} + impl> Eq for Unicode {} #[cfg(__unicase__iter_cmp)] @@ -198,4 +241,19 @@ mod tests { b.bytes = "στιγμας".len() as u64; b.iter(|| eq!("στιγμας", "στιγμασ")); } + + #[test] + fn test_contains() { + assert!(Unicode("A").contains(&Unicode("a"))); + assert!(Unicode("AA").contains(&Unicode("a"))); + assert!(Unicode("AAA").contains(&Unicode("aa"))); + assert!(Unicode("BA").contains(&Unicode("a"))); + assert!(Unicode("AB").contains(&Unicode("a"))); + assert!(Unicode("BABABB").contains(&Unicode("babb"))); + + assert!(!Unicode("B").contains(&Unicode("a"))); + assert!(!Unicode("BA").contains(&Unicode("aa"))); + assert!(!Unicode("BA").contains(&Unicode("aa"))); + assert!(!Unicode("BABABA").contains(&Unicode("babb"))); + } }