Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix repeat of preferred empty match #137

Merged
merged 1 commit into from
May 28, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 29 additions & 9 deletions java/com/google/re2j/Compiler.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ class Compiler {
private static class Frag {
final int i; // an instruction address (pc).
int out; // a patch list; see explanation in Prog.java
boolean nullable; // whether the fragment can match the empty string

Frag() {
this(0, 0);
Expand All @@ -34,8 +35,13 @@ private static class Frag {
}

Frag(int i, int out) {
this(i, out, false);
}

Frag(int i, int out, boolean nullable) {
this.i = i;
this.out = out;
this.nullable = nullable;
}
}

Expand All @@ -56,7 +62,7 @@ static Prog compileRegexp(Regexp re) {
private Frag newInst(int op) {
// TODO(rsc): impose length limit.
prog.addInst(op);
return new Frag(prog.numInst() - 1);
return new Frag(prog.numInst() - 1, 0, true);
}

// Returns a no-op fragment. Sometimes unavoidable.
Expand Down Expand Up @@ -90,7 +96,7 @@ private Frag cat(Frag f1, Frag f2) {
}
// TODO(rsc): elide nop
prog.patch(f1.out, f2.i);
return new Frag(f1.i, f2.out);
return new Frag(f1.i, f2.out, f1.nullable && f2.nullable);
}

// Given fragments for a and b, returns fragment for a|b.
Expand All @@ -107,11 +113,16 @@ private Frag alt(Frag f1, Frag f2) {
i.out = f1.i;
i.arg = f2.i;
f.out = prog.append(f1.out, f2.out);
f.nullable = f1.nullable || f2.nullable;
return f;
}

// Given a fragment for a, returns a fragment for a? or a?? (if nongreedy)
private Frag quest(Frag f1, boolean nongreedy) {
// loop returns the fragment for the main loop of a plus or star.
// For plus, it can be used directly. with f1.i as the entry.
// For star, it can be used directly when f1 can't match an empty string.
// (When f1 can match an empty string, f1* must be implemented as (f1+)?
// to get the priority match order correct.)
private Frag loop(Frag f1, boolean nongreedy) {
Frag f = newInst(Inst.ALT);
Inst i = prog.getInst(f.i);
if (nongreedy) {
Expand All @@ -121,12 +132,12 @@ private Frag quest(Frag f1, boolean nongreedy) {
i.out = f1.i;
f.out = f.i << 1 | 1;
}
f.out = prog.append(f.out, f1.out);
prog.patch(f1.out, f.i);
return f;
}

// Given a fragment a, returns a fragment for a* or a*? (if nongreedy)
private Frag star(Frag f1, boolean nongreedy) {
// Given a fragment for a, returns a fragment for a? or a?? (if nongreedy)
private Frag quest(Frag f1, boolean nongreedy) {
Frag f = newInst(Inst.ALT);
Inst i = prog.getInst(f.i);
if (nongreedy) {
Expand All @@ -136,13 +147,21 @@ private Frag star(Frag f1, boolean nongreedy) {
i.out = f1.i;
f.out = f.i << 1 | 1;
}
prog.patch(f1.out, f.i);
f.out = prog.append(f.out, f1.out);
return f;
}

// Given a fragment a, returns a fragment for a* or a*? (if nongreedy)
private Frag star(Frag f1, boolean nongreedy) {
if (f1.nullable) {
return quest(plus(f1, nongreedy), nongreedy);
}
return loop(f1, nongreedy);
}

// Given a fragment for a, returns a fragment for a+ or a+? (if nongreedy)
private Frag plus(Frag f1, boolean nongreedy) {
return new Frag(f1.i, star(f1, nongreedy).out);
return new Frag(f1.i, loop(f1, nongreedy).out, f1.nullable);
}

// op is a bitmask of EMPTY_* flags.
Expand All @@ -160,6 +179,7 @@ private Frag rune(int rune, int flags) {
// flags : parser flags
private Frag rune(int[] runes, int flags) {
Frag f = newInst(Inst.RUNE);
f.nullable = false;
Inst i = prog.getInst(f.i);
i.runes = runes;
flags &= RE2.FOLD_CASE; // only relevant flag is FoldCase
Expand Down
10 changes: 9 additions & 1 deletion javatests/com/google/re2j/FindTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,7 @@ public String toString() {
35,
35,
36),
new Test("(|a)*", "aa", 3, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2),
};

@Parameters
Expand Down Expand Up @@ -538,7 +539,14 @@ private void testSubmatch(String testName, Test test, int n, String[] result) {
System.err.println(testName + " " + test + " " + n + " " + k + " ");
String expect = test.submatchString(n, k / 2);
if (!expect.equals(result[k / 2])) {
fail(String.format("%s %d: expected %s got %s: %s", testName, n, expect, result, test));
fail(
String.format(
"%s %d: expected %s got %s: %s",
testName,
n,
expect,
Arrays.toString(result),
test));
}
}
}
Expand Down
19 changes: 19 additions & 0 deletions javatests/com/google/re2j/ProgTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,25 @@ public class ProgTest {
"(?:(?:^).)",
"0 fail\n" + "1* empty 4 -> 2\n" + "2 anynotnl -> 3\n" + "3 match\n"
},
{
"(?:|a)+",
"0 fail\n"
+ "1 nop -> 4\n"
+ "2 rune1 \"a\" -> 4\n"
+ "3* alt -> 1, 2\n"
+ "4 alt -> 3, 5\n"
+ "5 match\n"
},
{
"(?:|a)*",
"0 fail\n"
+ "1 nop -> 4\n"
+ "2 rune1 \"a\" -> 4\n"
+ "3 alt -> 1, 2\n"
+ "4 alt -> 3, 6\n"
+ "5* alt -> 3, 6\n"
+ "6 match\n"
},
};

private final String input;
Expand Down
12 changes: 4 additions & 8 deletions testdata/basic.dat
Original file line number Diff line number Diff line change
Expand Up @@ -124,24 +124,20 @@ E ((a)) abc (0,1)(0,1)(0,1)
E (a)b(c) abc (0,3)(0,1)(2,3)
E a+b+c aabbabc (4,7)
E a* aaa (0,3)
#E (a*)* - (0,0)(0,0)
E (a*)* - (0,0)(?,?) RE2/Go
E (a*)* - (0,0)(0,0)
E (a*)+ - (0,0)(0,0)
#E (a*|b)* - (0,0)(0,0)
E (a*|b)* - (0,0)(?,?) RE2/Go
E (a*|b)* - (0,0)(0,0)
E (a+|b)* ab (0,2)(1,2)
E (a+|b)+ ab (0,2)(1,2)
E (a+|b)? ab (0,1)(0,1)
BE [^ab]* cde (0,3)
#E (^)* - (0,0)(0,0)
E (^)* - (0,0)(?,?) RE2/Go
E (^)* - (0,0)(0,0)
BE a* NULL (0,0)
E ([abc])*d abbbcd (0,6)(4,5)
E ([abc])*bcd abcd (0,4)(0,1)
E a|b|c|d|e e (0,1)
E (a|b|c|d|e)f ef (0,2)(0,1)
#E ((a*|b))* - (0,0)(0,0)(0,0)
E ((a*|b))* - (0,0)(?,?)(?,?) RE2/Go
E ((a*|b))* - (0,0)(0,0)(0,0)
BE abcd*efg abcdefg (0,7)
BE ab* xabyabbbz (1,3)
BE ab* xayabbbz (1,2)
Expand Down
18 changes: 6 additions & 12 deletions testdata/nullsubexpr.dat
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
NOTE null subexpression matches : 2002-06-06

E (a*)* a (0,1)(0,1)
#E SAME x (0,0)(0,0)
E SAME x (0,0)(?,?) RE2/Go
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E (a*)+ a (0,1)(0,1)
Expand All @@ -19,17 +18,15 @@ E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)

E ([a]*)* a (0,1)(0,1)
#E SAME x (0,0)(0,0)
E SAME x (0,0)(?,?) RE2/Go
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([a]*)+ a (0,1)(0,1)
E SAME x (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaax (0,6)(0,6)
E ([^b]*)* a (0,1)(0,1)
#E SAME b (0,0)(0,0)
E SAME b (0,0)(?,?) RE2/Go
E SAME b (0,0)(0,0)
E SAME aaaaaa (0,6)(0,6)
E SAME aaaaaab (0,6)(0,6)
E ([ab]*)* a (0,1)(0,1)
Expand All @@ -41,11 +38,9 @@ E SAME bbbbbb (0,6)(0,6)
E SAME aaaabcde (0,5)(0,5)
E ([^a]*)* b (0,1)(0,1)
E SAME bbbbbb (0,6)(0,6)
#E SAME aaaaaa (0,0)(0,0)
E SAME aaaaaa (0,0)(?,?) RE2/Go
E SAME aaaaaa (0,0)(0,0)
E ([^ab]*)* ccccxx (0,6)(0,6)
#E SAME ababab (0,0)(0,0)
E SAME ababab (0,0)(?,?) RE2/Go
E SAME ababab (0,0)(0,0)

E ((z)+|a)* zabcde (0,2)(1,2)

Expand All @@ -65,8 +60,7 @@ B \(a*\)*\(x\)\(\1\) axa (0,3)(0,1)(1,2)(2,3)
B \(a*\)*\(x\)\(\1\)\(x\) axax (0,4)(0,1)(1,2)(2,3)(3,4)
B \(a*\)*\(x\)\(\1\)\(x\) axxa (0,3)(1,1)(1,2)(2,2)(2,3)

#E (a*)*(x) x (0,1)(0,0)(0,1)
E (a*)*(x) x (0,1)(?,?)(0,1) RE2/Go
E (a*)*(x) x (0,1)(0,0)(0,1)
E (a*)*(x) ax (0,2)(0,1)(1,2)
E (a*)*(x) axa (0,2)(0,1)(1,2)

Expand Down
Binary file modified testdata/re2-exhaustive.txt.gz
Binary file not shown.
Loading