Skip to content

Commit

Permalink
Fix a bug in uambiguous prefixes.
Browse files Browse the repository at this point in the history
Specifically, given the strings ABCX, CDAX and BCX, it was reporting
the unambiguous set as A, BCX and CDAX, which is wrong since A is a
substring of CDAX.

unambiguous_prefixes is now quite a bit of a mess, but so is the rest
of the literal extraction code. The only thing it has going for it is
a massive test suite.

Fixes rust-lang#289
  • Loading branch information
BurntSushi committed Oct 26, 2016
1 parent 3cfef1e commit 6300490
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 6 deletions.
21 changes: 15 additions & 6 deletions regex-syntax/src/literals.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,14 +216,17 @@ impl Literals {
if self.lits.is_empty() {
return self.to_empty();
}
let mut old: Vec<Lit> = self.lits.iter().cloned().collect();
let mut new = self.to_empty();
'OUTER:
for lit1 in &self.lits {
while let Some(mut candidate) = old.pop() {
if candidate.is_empty() {
continue;
}
if new.lits.is_empty() {
new.lits.push(lit1.clone());
new.lits.push(candidate);
continue;
}
let mut candidate = lit1.clone();
for lit2 in &mut new.lits {
if lit2.is_empty() {
continue;
Expand All @@ -236,11 +239,14 @@ impl Literals {
lit2.cut = candidate.cut;
continue 'OUTER;
}
if candidate.len() <= lit2.len() {
if candidate.len() < lit2.len() {
if let Some(i) = position(&candidate, &lit2) {
lit2.truncate(i);
lit2.cut();
candidate.cut();
let mut lit3 = lit2.clone();
lit3.truncate(i);
lit3.cut();
old.push(lit3);
lit2.clear();
}
} else {
if let Some(i) = position(&lit2, &candidate) {
Expand Down Expand Up @@ -1381,6 +1387,9 @@ mod tests {
test_unamb!(unambiguous11,
vec![M("zazb"), M("azb")], vec![C("azb"), C("z")]);
test_unamb!(unambiguous12, vec![M("foo"), C("foo")], vec![C("foo")]);
test_unamb!(unambiguous13,
vec![M("ABCX"), M("CDAX"), M("BCX")],
vec![C("A"), C("BCX"), C("CD")]);

// ************************************************************************
// Tests for suffix trimming.
Expand Down
3 changes: 3 additions & 0 deletions tests/regression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,3 +68,6 @@ mat!(ascii_boundary_capture, u!(r"(?-u)(\B)"), "\u{28f3e}", Some((0, 0)));
// See: https://github.com/rust-lang-nursery/regex/issues/280
ismatch!(partial_anchor_alternate_begin, u!(r"^a|z"), "yyyyya", false);
ismatch!(partial_anchor_alternate_end, u!(r"a$|z"), "ayyyyy", false);

// See: https://github.com/rust-lang-nursery/regex/issues/289
mat!(lits_unambiguous, u!(r"(ABC|CDA|BC)X"), "CDAX", Some((0, 4)));

0 comments on commit 6300490

Please sign in to comment.