Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rmacedo/update regex to mindfa #39

Merged
merged 22 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ This library provides circom circuits that enables you to prove that
- the input string satisfies regular expressions (regexes) specified in the chip.
- the substrings are correctly extracted from the input string according to substring definitions.

This is a JS/Rust adaptation of the Python regex-to-circom work done by [sampriti](https://github.com/sampritipanda/) and [yush_g](https://twitter.com/yush_g) at https://www.zkregex.com
This is a JS/Rust adaptation of the Python regex-to-circom work done by [sampriti](https://github.com/sampritipanda/) and [yush_g](https://twitter.com/yush_g), along with [sorasue](https://github.com/SoraSuegami/)'s decomposed specifications. You can generate your own regexes via our no-code tool at https://www.zkregex.com

In addition to the original work, this library also supports the following features:
- CLI to dynamically generate regex circuit based on regex argument
Expand All @@ -22,6 +22,7 @@ You can define a regex to be proved and its substring patterns to be revealed.
Specifically, there are two ways to define them:
1. (manual way) converting the regex into an equivalent determistic finite automaton (DFA), selecting state transitions for each substring pattern, and writing the transitions in a json file.
2. (automatic way) writing a decomposed version of the regex in a json file with specifying which part of the regex is revealed.
3. (no code way) put the regex into zkregex.com > tool, highlight your chosen part, and copy the generated circuit
While the manual way supports more kinds of regexes than the automatic way, the latter is easier and sufficient for most regexes.

### Theory
Expand Down Expand Up @@ -55,7 +56,7 @@ For example, if you want to verify the regex of `email was meant for @(a|b|c|d|e
},
{
"is_public": true,
"regex_def": "(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z)+"
"regex_def": "[a-z]+"
},
{
"is_public": false,
Expand All @@ -72,7 +73,7 @@ You can generate its regex circom as follows.
#### `zk-regex raw -r <RAW_REGEX> -s <SUBSTRS_JSON_PATH> -c <CIRCOM_FILE_PATH> -t <TEMPLATE_NAME> -g <GEN_SUBSTRS (true/false)>`
This command generates a regex circom from a raw string of the regex definition and a json file that defines state transitions in DFA to be revealed.
For example, to verify the regex `1=(a|b) (2=(b|c)+ )+d` and reveal its alphabets,
1. Visualize DFA of the regex using [this website](https://mindfa.onrender.com/min_dfa).
1. Visualize DFA of the regex using [this website](https://zkregex.com).
2. Find state transitions matching with the substrings to be revealed. In this case, they are `2->3` for the alphabets after `1=`, `6->7` and `7->7` for those after `2=`, and `8->9` for `d`.
3. Make a json file at `./simple_regex_substrs.json` that defines the state transitions. For example,
```
Expand Down
6 changes: 3 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,10 @@
"packages/*"
],
"contributors": [
"Javier Su <[email protected]>",
"Kata Choi <[email protected]>",
"Sora Suegami <[email protected]>",
"Yush G <[email protected]>"
"Yush G <[email protected]>",
"Javier Su <[email protected]>",
"Kata Choi <[email protected]>"
],
"scripts": {
"install": "yarn workspaces -pt run install",
Expand Down
2 changes: 2 additions & 0 deletions packages/circom/circuits/common/email_addr_regex.circom
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pragma circom 2.1.5;

include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom";

// regex: (a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|!|#|$|%|&|'|\*|\+|-|/|=|\?|^|_|`|{|\||}|~|\.)+@(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\.|-)+
template EmailAddrRegex(msg_bytes) {
signal input msg[msg_bytes];
signal output out;
Expand Down Expand Up @@ -248,6 +249,7 @@ template EmailAddrRegex(msg_bytes) {
is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][3] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1];
is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0];
}
// substrings calculated: [{(1, 2), (1, 1), (0, 1), (3, 3), (2, 3)}]
signal is_substr0[msg_bytes][6];
signal is_reveal0[msg_bytes];
signal output reveal0[msg_bytes];
Expand Down
2 changes: 2 additions & 0 deletions packages/circom/circuits/common/email_domain_regex.circom
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pragma circom 2.1.5;

include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom";

// regex: (a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|!|#|$|%|&|'|\*|\+|-|/|=|\?|^|_|`|{|\||}|~|\.)+@(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|\.|-)+
template EmailDomainRegex(msg_bytes) {
signal input msg[msg_bytes];
signal output out;
Expand Down Expand Up @@ -248,6 +249,7 @@ template EmailDomainRegex(msg_bytes) {
is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][3] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1];
is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0];
}
// substrings calculated: [{(3, 3), (2, 3)}]
signal is_substr0[msg_bytes][3];
signal is_reveal0[msg_bytes];
signal output reveal0[msg_bytes];
Expand Down
2 changes: 2 additions & 0 deletions packages/circom/circuits/common/message_id_regex.circom
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pragma circom 2.1.5;

include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom";

// regex: ((\n)|^)message-id:<(=|@|\.|\+|_|-|a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9)+>\n
template MessageIdRegex(msg_bytes) {
signal input msg[msg_bytes];
signal output out;
Expand Down Expand Up @@ -270,6 +271,7 @@ template MessageIdRegex(msg_bytes) {
is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][7] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1];
is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0];
}
// substrings calculated: [{(17, 18), (1, 4), (1, 1), (18, 1)}]
signal is_substr0[msg_bytes][5];
signal is_reveal0[msg_bytes];
signal output reveal0[msg_bytes];
Expand Down
288 changes: 288 additions & 0 deletions packages/circom/tests/circuits/simple_regex_decomposed.circom
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
pragma circom 2.1.5;

include "@zk-email/zk-regex-circom/circuits/regex_helpers.circom";

// regex: email was meant for @(a|b|c|d|e|f|g|h|i|j|k|l|m|n|o|p|q|r|s|t|u|v|w|x|y|z|A|B|C|D|E|F|G|H|I|J|K|L|M|N|O|P|Q|R|S|T|U|V|W|X|Y|Z|0|1|2|3|4|5|6|7|8|9|_)+
template SimpleRegexDecomposed(msg_bytes) {
signal input msg[msg_bytes];
signal output out;

var num_bytes = msg_bytes+1;
signal in[num_bytes];
in[0]<==255;
for (var i = 0; i < msg_bytes; i++) {
in[i+1] <== msg[i];
}

component eq[26][num_bytes];
component lt[4][num_bytes];
component and[26][num_bytes];
component multi_or[2][num_bytes];
signal states[num_bytes+1][24];
component state_changed[num_bytes];

states[0][0] <== 1;
for (var i = 1; i < 24; i++) {
states[0][i] <== 0;
}

for (var i = 0; i < num_bytes; i++) {
state_changed[i] = MultiOR(23);
lt[0][i] = LessEqThan(8);
lt[0][i].in[0] <== 65;
lt[0][i].in[1] <== in[i];
lt[1][i] = LessEqThan(8);
lt[1][i].in[0] <== in[i];
lt[1][i].in[1] <== 90;
and[0][i] = AND();
and[0][i].a <== lt[0][i].out;
and[0][i].b <== lt[1][i].out;
lt[2][i] = LessEqThan(8);
lt[2][i].in[0] <== 97;
lt[2][i].in[1] <== in[i];
lt[3][i] = LessEqThan(8);
lt[3][i].in[0] <== in[i];
lt[3][i].in[1] <== 122;
and[1][i] = AND();
and[1][i].a <== lt[2][i].out;
and[1][i].b <== lt[3][i].out;
eq[0][i] = IsEqual();
eq[0][i].in[0] <== in[i];
eq[0][i].in[1] <== 48;
eq[1][i] = IsEqual();
eq[1][i].in[0] <== in[i];
eq[1][i].in[1] <== 49;
eq[2][i] = IsEqual();
eq[2][i].in[0] <== in[i];
eq[2][i].in[1] <== 50;
eq[3][i] = IsEqual();
eq[3][i].in[0] <== in[i];
eq[3][i].in[1] <== 51;
eq[4][i] = IsEqual();
eq[4][i].in[0] <== in[i];
eq[4][i].in[1] <== 52;
eq[5][i] = IsEqual();
eq[5][i].in[0] <== in[i];
eq[5][i].in[1] <== 53;
eq[6][i] = IsEqual();
eq[6][i].in[0] <== in[i];
eq[6][i].in[1] <== 54;
eq[7][i] = IsEqual();
eq[7][i].in[0] <== in[i];
eq[7][i].in[1] <== 55;
eq[8][i] = IsEqual();
eq[8][i].in[0] <== in[i];
eq[8][i].in[1] <== 56;
eq[9][i] = IsEqual();
eq[9][i].in[0] <== in[i];
eq[9][i].in[1] <== 57;
eq[10][i] = IsEqual();
eq[10][i].in[0] <== in[i];
eq[10][i].in[1] <== 95;
and[2][i] = AND();
and[2][i].a <== states[i][1];
multi_or[0][i] = MultiOR(13);
multi_or[0][i].in[0] <== and[0][i].out;
multi_or[0][i].in[1] <== and[1][i].out;
multi_or[0][i].in[2] <== eq[0][i].out;
multi_or[0][i].in[3] <== eq[1][i].out;
multi_or[0][i].in[4] <== eq[2][i].out;
multi_or[0][i].in[5] <== eq[3][i].out;
multi_or[0][i].in[6] <== eq[4][i].out;
multi_or[0][i].in[7] <== eq[5][i].out;
multi_or[0][i].in[8] <== eq[6][i].out;
multi_or[0][i].in[9] <== eq[7][i].out;
multi_or[0][i].in[10] <== eq[8][i].out;
multi_or[0][i].in[11] <== eq[9][i].out;
multi_or[0][i].in[12] <== eq[10][i].out;
and[2][i].b <== multi_or[0][i].out;
and[3][i] = AND();
and[3][i].a <== states[i][23];
and[3][i].b <== multi_or[0][i].out;
multi_or[1][i] = MultiOR(2);
multi_or[1][i].in[0] <== and[2][i].out;
multi_or[1][i].in[1] <== and[3][i].out;
states[i+1][1] <== multi_or[1][i].out;
state_changed[i].in[0] <== states[i+1][1];
eq[11][i] = IsEqual();
eq[11][i].in[0] <== in[i];
eq[11][i].in[1] <== 101;
and[4][i] = AND();
and[4][i].a <== states[i][0];
and[4][i].b <== eq[11][i].out;
states[i+1][2] <== and[4][i].out;
state_changed[i].in[1] <== states[i+1][2];
eq[12][i] = IsEqual();
eq[12][i].in[0] <== in[i];
eq[12][i].in[1] <== 109;
and[5][i] = AND();
and[5][i].a <== states[i][2];
and[5][i].b <== eq[12][i].out;
states[i+1][3] <== and[5][i].out;
state_changed[i].in[2] <== states[i+1][3];
eq[13][i] = IsEqual();
eq[13][i].in[0] <== in[i];
eq[13][i].in[1] <== 46;
and[6][i] = AND();
and[6][i].a <== states[i][1];
and[6][i].b <== eq[13][i].out;
states[i+1][4] <== and[6][i].out;
state_changed[i].in[3] <== states[i+1][4];
eq[14][i] = IsEqual();
eq[14][i].in[0] <== in[i];
eq[14][i].in[1] <== 97;
and[7][i] = AND();
and[7][i].a <== states[i][3];
and[7][i].b <== eq[14][i].out;
states[i+1][5] <== and[7][i].out;
state_changed[i].in[4] <== states[i+1][5];
eq[15][i] = IsEqual();
eq[15][i].in[0] <== in[i];
eq[15][i].in[1] <== 105;
and[8][i] = AND();
and[8][i].a <== states[i][5];
and[8][i].b <== eq[15][i].out;
states[i+1][6] <== and[8][i].out;
state_changed[i].in[5] <== states[i+1][6];
eq[16][i] = IsEqual();
eq[16][i].in[0] <== in[i];
eq[16][i].in[1] <== 108;
and[9][i] = AND();
and[9][i].a <== states[i][6];
and[9][i].b <== eq[16][i].out;
states[i+1][7] <== and[9][i].out;
state_changed[i].in[6] <== states[i+1][7];
eq[17][i] = IsEqual();
eq[17][i].in[0] <== in[i];
eq[17][i].in[1] <== 32;
and[10][i] = AND();
and[10][i].a <== states[i][7];
and[10][i].b <== eq[17][i].out;
states[i+1][8] <== and[10][i].out;
state_changed[i].in[7] <== states[i+1][8];
eq[18][i] = IsEqual();
eq[18][i].in[0] <== in[i];
eq[18][i].in[1] <== 119;
and[11][i] = AND();
and[11][i].a <== states[i][8];
and[11][i].b <== eq[18][i].out;
states[i+1][9] <== and[11][i].out;
state_changed[i].in[8] <== states[i+1][9];
and[12][i] = AND();
and[12][i].a <== states[i][9];
and[12][i].b <== eq[14][i].out;
states[i+1][10] <== and[12][i].out;
state_changed[i].in[9] <== states[i+1][10];
eq[19][i] = IsEqual();
eq[19][i].in[0] <== in[i];
eq[19][i].in[1] <== 115;
and[13][i] = AND();
and[13][i].a <== states[i][10];
and[13][i].b <== eq[19][i].out;
states[i+1][11] <== and[13][i].out;
state_changed[i].in[10] <== states[i+1][11];
and[14][i] = AND();
and[14][i].a <== states[i][11];
and[14][i].b <== eq[17][i].out;
states[i+1][12] <== and[14][i].out;
state_changed[i].in[11] <== states[i+1][12];
and[15][i] = AND();
and[15][i].a <== states[i][12];
and[15][i].b <== eq[12][i].out;
states[i+1][13] <== and[15][i].out;
state_changed[i].in[12] <== states[i+1][13];
and[16][i] = AND();
and[16][i].a <== states[i][13];
and[16][i].b <== eq[11][i].out;
states[i+1][14] <== and[16][i].out;
state_changed[i].in[13] <== states[i+1][14];
and[17][i] = AND();
and[17][i].a <== states[i][14];
and[17][i].b <== eq[14][i].out;
states[i+1][15] <== and[17][i].out;
state_changed[i].in[14] <== states[i+1][15];
eq[20][i] = IsEqual();
eq[20][i].in[0] <== in[i];
eq[20][i].in[1] <== 110;
and[18][i] = AND();
and[18][i].a <== states[i][15];
and[18][i].b <== eq[20][i].out;
states[i+1][16] <== and[18][i].out;
state_changed[i].in[15] <== states[i+1][16];
eq[21][i] = IsEqual();
eq[21][i].in[0] <== in[i];
eq[21][i].in[1] <== 116;
and[19][i] = AND();
and[19][i].a <== states[i][16];
and[19][i].b <== eq[21][i].out;
states[i+1][17] <== and[19][i].out;
state_changed[i].in[16] <== states[i+1][17];
and[20][i] = AND();
and[20][i].a <== states[i][17];
and[20][i].b <== eq[17][i].out;
states[i+1][18] <== and[20][i].out;
state_changed[i].in[17] <== states[i+1][18];
eq[22][i] = IsEqual();
eq[22][i].in[0] <== in[i];
eq[22][i].in[1] <== 102;
and[21][i] = AND();
and[21][i].a <== states[i][18];
and[21][i].b <== eq[22][i].out;
states[i+1][19] <== and[21][i].out;
state_changed[i].in[18] <== states[i+1][19];
eq[23][i] = IsEqual();
eq[23][i].in[0] <== in[i];
eq[23][i].in[1] <== 111;
and[22][i] = AND();
and[22][i].a <== states[i][19];
and[22][i].b <== eq[23][i].out;
states[i+1][20] <== and[22][i].out;
state_changed[i].in[19] <== states[i+1][20];
eq[24][i] = IsEqual();
eq[24][i].in[0] <== in[i];
eq[24][i].in[1] <== 114;
and[23][i] = AND();
and[23][i].a <== states[i][20];
and[23][i].b <== eq[24][i].out;
states[i+1][21] <== and[23][i].out;
state_changed[i].in[20] <== states[i+1][21];
and[24][i] = AND();
and[24][i].a <== states[i][21];
and[24][i].b <== eq[17][i].out;
states[i+1][22] <== and[24][i].out;
state_changed[i].in[21] <== states[i+1][22];
eq[25][i] = IsEqual();
eq[25][i].in[0] <== in[i];
eq[25][i].in[1] <== 64;
and[25][i] = AND();
and[25][i].a <== states[i][22];
and[25][i].b <== eq[25][i].out;
states[i+1][23] <== and[25][i].out;
state_changed[i].in[22] <== states[i+1][23];
states[i+1][0] <== 1 - state_changed[i].out;
}

component final_state_result = MultiOR(num_bytes+1);
for (var i = 0; i <= num_bytes; i++) {
final_state_result.in[i] <== states[i][4];
}
out <== final_state_result.out;

signal is_consecutive[msg_bytes+1][2];
is_consecutive[msg_bytes][1] <== 1;
for (var i = 0; i < msg_bytes; i++) {
is_consecutive[msg_bytes-1-i][0] <== states[num_bytes-i][4] * (1 - is_consecutive[msg_bytes-i][1]) + is_consecutive[msg_bytes-i][1];
is_consecutive[msg_bytes-1-i][1] <== state_changed[msg_bytes-i].out * is_consecutive[msg_bytes-1-i][0];
}
// substrings calculated: [{(1, 1), (23, 1)}]
signal is_substr0[msg_bytes][3];
signal is_reveal0[msg_bytes];
signal output reveal0[msg_bytes];
for (var i = 0; i < msg_bytes; i++) {
is_substr0[i][0] <== 0;
is_substr0[i][1] <== is_substr0[i][0] + states[i+1][1] * states[i+2][1];
is_substr0[i][2] <== is_substr0[i][1] + states[i+1][23] * states[i+2][1];
is_reveal0[i] <== is_substr0[i][2] * is_consecutive[i][1];
reveal0[i] <== in[i+1] * is_reveal0[i];
}
}
Loading
Loading