From be9653c97c12d3533a7373ef3d9089f3959bab5c Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Sat, 20 May 2017 12:58:27 -0400 Subject: [PATCH] RegexSet: fix literal optimization bug When combining multiple regexes in a set where some are anchored and others aren't, it's possible to wind up in a situation where prefix scanning is used. This is bad, because it can lead to some of the anchored regexes matching where they shouldn't be allowed to match. As a result, we disable all literal optimizations for regex sets if *any* regex in the set is anchored. Fixes #358 --- src/exec.rs | 11 +++++++++++ tests/set.rs | 1 + 2 files changed, 12 insertions(+) diff --git a/src/exec.rs b/src/exec.rs index 68a9e18f43..3192ce7928 100644 --- a/src/exec.rs +++ b/src/exec.rs @@ -210,6 +210,9 @@ impl ExecBuilder { let mut prefixes = Some(Literals::empty()); let mut suffixes = Some(Literals::empty()); let mut bytes = false; + let is_set = self.options.pats.len() > 1; + // If we're compiling a regex set and that set has any anchored + // expressions, then disable all literal optimizations. for pat in &self.options.pats { let parser = ExprBuilder::new() @@ -227,6 +230,10 @@ impl ExecBuilder { // Partial anchors unfortunately make it hard to use prefixes, // so disable them. prefixes = None; + } else if is_set && expr.is_anchored_start() { + // Regex sets with anchors do not go well with literal + // optimizations. + prefixes = None; } prefixes = prefixes.and_then(|mut prefixes| { if !prefixes.union_prefixes(&expr) { @@ -240,6 +247,10 @@ impl ExecBuilder { // Partial anchors unfortunately make it hard to use suffixes, // so disable them. suffixes = None; + } else if is_set && expr.is_anchored_end() { + // Regex sets with anchors do not go well with literal + // optimizations. + prefixes = None; } suffixes = suffixes.and_then(|mut suffixes| { if !suffixes.union_suffixes(&expr) { diff --git a/tests/set.rs b/tests/set.rs index 52b1b0dead..da3717e070 100644 --- a/tests/set.rs +++ b/tests/set.rs @@ -19,6 +19,7 @@ matset!(set17, &[".*a"], "a", 0); nomatset!(nset1, &["a", "a"], "b"); nomatset!(nset2, &["^foo", "bar$"], "bar foo"); nomatset!(nset3, { let xs: &[&str] = &[]; xs }, "a"); +nomatset!(nset4, &[r"^rooted$", r"\.log$"], "notrooted"); // See: https://github.com/rust-lang/regex/issues/187 #[test]