Skip to content

Commit

Permalink
Implement soft keywords (hand-written and code generation) (python#129)
Browse files Browse the repository at this point in the history
  • Loading branch information
lysnikolaou authored Jun 1, 2020
1 parent 6c50468 commit a10babb
Show file tree
Hide file tree
Showing 5 changed files with 58 additions and 10 deletions.
4 changes: 4 additions & 0 deletions Parser/pegen/vm.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,10 @@ run_vm(Parser *p, Rule rules[], int root)
oparg = f->rule->opcodes[f->iop++];
v = _PyPegen_expect_token(p, oparg);
break;
case OP_SOFT_KEYWORD:
oparg = f->rule->opcodes[f->iop++];
v = _PyPegen_expect_soft_keyword(p, soft_keywords[oparg]);
break;
case OP_RULE:
oparg = f->rule->opcodes[f->iop++];
Rule *rule = &rules[oparg];
Expand Down
2 changes: 2 additions & 0 deletions Parser/pegen/vm.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ typedef enum _opcodes {
OP_SUCCESS,
OP_FAILURE,
// The rest have an argument
OP_SOFT_KEYWORD,
OP_TOKEN,
OP_RULE,
OP_RETURN,
Expand All @@ -31,6 +32,7 @@ static char *opcode_names[] = {
"OP_SUCCESS",
"OP_FAILURE",
// The rest have an argument
"OP_SOFT_KEYWORD",
"OP_TOKEN",
"OP_RULE",
"OP_RETURN",
Expand Down
18 changes: 15 additions & 3 deletions Parser/pegen/vmparse.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ static KeywordToken *reserved_keywords[] = {
},
};

enum {
SK___PEG_PARSER__,
};

static const char *soft_keywords[] = {
"__peg_parser__",
};

enum {
R_START,
R_STMT,
Expand All @@ -33,6 +41,7 @@ enum {
A_FACTOR_1,
A_FACTOR_2,
A_FACTOR_3,
A_FACTOR_4,
A__GATHER_2_0,
A__GATHER_2_1,
};
Expand Down Expand Up @@ -78,12 +87,13 @@ static Rule all_rules[] = {
},
{"factor",
R_FACTOR,
{0, 8, 16, 19, -1},
{0, 8, 16, 19, 23, -1},
{
OP_TOKEN, 7, OP_RULE, R_EXPR, OP_TOKEN, 8, OP_RETURN, A_FACTOR_0,
OP_TOKEN, 9, OP_RULE, R__GATHER_2, OP_TOKEN, 10, OP_RETURN, A_FACTOR_1,
OP_NUMBER, OP_RETURN, A_FACTOR_2,
OP_NAME, OP_RETURN, A_FACTOR_3,
OP_SOFT_KEYWORD, SK___PEG_PARSER__, OP_RETURN, A_FACTOR_3,
OP_NAME, OP_RETURN, A_FACTOR_4,
},
},
{"root",
Expand Down Expand Up @@ -132,7 +142,7 @@ call_action(Parser *p, Frame *_f, int _iaction)
case A_EXPR_1:
case A_TERM_1:
case A_FACTOR_2:
case A_FACTOR_3:
case A_FACTOR_4:
case A__GATHER_2_0:
case A__GATHER_2_1:
return _f->vals[0];
Expand All @@ -146,6 +156,8 @@ call_action(Parser *p, Frame *_f, int _iaction)
return _f->vals[1];
case A_FACTOR_1:
return _Py_List ( _f->vals[1] , Load , EXTRA );
case A_FACTOR_3:
return RAISE_SYNTAX_ERROR("You found it!");
default:
assert(0);
}
Expand Down
1 change: 1 addition & 0 deletions Tools/peg_generator/data/simple.gram
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,5 @@ factor:
| '(' a=expr ')' { a }
| '[' a=','.expr+ ']' { _Py_List(a, Load, EXTRA) }
| NUMBER
| "__peg_parser__" { RAISE_SYNTAX_ERROR("You found it!") }
| NAME
43 changes: 36 additions & 7 deletions Tools/peg_generator/pegen/vm_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,17 +68,27 @@ def __init__(
self.gen = parser_generator
self.cache: Dict[Any, Any] = {}
self.keyword_cache: Dict[str, int] = {}
self.soft_keyword_cache: List[str] = []

def keyword_helper(self, keyword: str) -> int:
def keyword_helper(self, keyword: str) -> Tuple[str, int]:
if keyword not in self.keyword_cache:
self.keyword_cache[keyword] = self.gen.keyword_type()
return self.keyword_cache[keyword]
return "OP_TOKEN", self.keyword_cache[keyword]

def visit_StringLeaf(self, node: StringLeaf) -> int:
def soft_keyword_helper(self, keyword: str) -> Tuple[str, str]:
if keyword not in self.soft_keyword_cache:
self.soft_keyword_cache.append(keyword)
return "OP_SOFT_KEYWORD", f"SK_{keyword.upper()}"

def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, Union[str, int]]:
val = ast.literal_eval(node.value)
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
return self.keyword_helper(val)
return token.EXACT_TOKEN_TYPES[val] # type: ignore [attr-defined]
if node.value.endswith("'"):
return self.keyword_helper(val)
else:
return self.soft_keyword_helper(val)
tok_num = token.EXACT_TOKEN_TYPES[val]
return "OP_TOKEN", token.tok_name[tok_num]

def visit_Repeat0(self, node: Repeat0) -> None:
if node in self.cache:
Expand Down Expand Up @@ -133,6 +143,7 @@ def generate(self, filename: str) -> None:
self.collect_todo()
self.gather_actions()
self._setup_keywords()
self._setup_soft_keywords()

self.print("enum {")
with self.indent():
Expand Down Expand Up @@ -194,6 +205,24 @@ def _setup_keywords(self) -> None:
self.print("};")
self.print()

def _setup_soft_keywords(self) -> None:
soft_keywords = self.callmakervisitor.soft_keyword_cache
if not soft_keywords:
return

self.print("enum {")
with self.indent():
for soft_keyword in soft_keywords:
self.print(f"SK_{soft_keyword.upper()},")
self.print("};")
self.print()
self.print("static const char *soft_keywords[] = {")
with self.indent():
for soft_keyword in soft_keywords:
self.print(f'"{soft_keyword}",')
self.print("};")
self.print()

def print_action_cases(self) -> None:
unique_actions: Dict[str, List[str]] = defaultdict(list)
for actionname, action in self.actions.items():
Expand Down Expand Up @@ -310,8 +339,8 @@ def visit_NameLeaf(self, node: NameLeaf) -> None:
self.add_opcode("OP_RULE", self._get_rule_opcode(name))

def visit_StringLeaf(self, node: StringLeaf) -> None:
token_type = self.callmakervisitor.visit(node)
self.add_opcode("OP_TOKEN", token_type)
op_pair = self.callmakervisitor.visit(node)
self.add_opcode(*op_pair)

def handle_loop_rhs(
self, node: Rhs, opcodes_by_alt: Dict[int, List[str]], collect_opcode: str,
Expand Down

0 comments on commit a10babb

Please sign in to comment.