From 9a845210796508cacdef4fa70e19ee059105f2f5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B4=E7=83=9C?= Date: Thu, 29 Jul 2021 10:03:01 -0700 Subject: [PATCH 1/3] =?UTF-8?q?#25=20=E6=8B=86=E5=87=BA=E8=AF=8D=E6=B3=95?= =?UTF-8?q?=E9=83=A8=E5=88=86,=20=E9=80=9A=E8=BF=87=E5=9F=BA=E6=9C=ACunitt?= =?UTF-8?q?est=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cantonese.py | 347 +----------------------------- "src/\350\257\215\346\263\225.py" | 347 ++++++++++++++++++++++++++++++ 2 files changed, 348 insertions(+), 346 deletions(-) create mode 100644 "src/\350\257\215\346\263\225.py" diff --git a/src/cantonese.py b/src/cantonese.py index e46b79e..49dc014 100644 --- a/src/cantonese.py +++ b/src/cantonese.py @@ -9,354 +9,9 @@ import os import argparse from 濑嘢 import 濑啲咩嘢 +from 词法 import * from stack_vm import * -""" - Get the Cantonese Token List -""" - -kw_print = "畀我睇下" -kw_endprint = "点样先" -kw_exit = "收工" -kw_in = "喺" -kw_elif = "定系" -kw_turtle_beg = "老作一下" -kw_type = "起底" -kw_assign = "讲嘢" -kw_class_def = "咩系" -kw_else_or_not = "唔系" -kw_is = "系" -kw_if = "如果" -kw_then = "嘅话" -kw_do = "->" -kw_begin = "{" -kw_end = "}" -kw_pass = "咩都唔做" -kw_while_do = "落操场玩跑步" -kw_function = "$" -kw_call = "用下" -kw_import = "使下" -kw_func_begin = "要做咩" -kw_func_end = "搞掂" -kw_is_2 = "就" -kw_assert = "谂下" -kw_class_assign = "佢嘅" -kw_while = "玩到" -kw_whi_end = "为止" -kw_return = "还数" -kw_try = "执嘢" -kw_except = "揾到" -kw_finally = "执手尾" -kw_raise = "掟个" -kw_raise_end = "来睇下" -kw_from = "从" -kw_to = "行到" -kw_endfor = "行晒" -kw_extend = "佢个老豆叫" -kw_method = "佢识得" -kw_endclass = "明白未啊" -kw_cmd = "落Order" -kw_break = "饮茶先啦" -kw_lst_assign = "拍住上" -kw_is_3 = "係" -kw_exit_1 = "辛苦晒啦" -kw_exit_2 = "同我躝" -kw_false = "唔啱" -kw_true = "啱" -kw_none = "冇" -kw_stackinit = "有条仆街叫" -kw_push = "顶你" -kw_pop = "丢你" -kw_model = "嗌" -kw_mod_new = "过嚟估下" -kw_class_init = "佢有啲咩" -kw_self = "自己嘅" -kw_call_begin = "下" -kw_get_value = "@" - -keywords = ( - kw_print, - kw_endprint, - kw_exit, - kw_in, - kw_elif, - kw_turtle_beg, - kw_type, - kw_assign, - kw_class_def, - kw_else_or_not, - kw_is, - kw_if, - kw_then, - kw_do, - kw_begin, - kw_end, - kw_pass, - kw_while_do, - kw_function, - kw_call, - kw_import, - kw_func_begin, - kw_func_end, - kw_is_2, - kw_assert, - kw_class_assign, - kw_while, - kw_whi_end, - kw_return, - kw_try, - kw_except, - kw_finally, - kw_raise, - kw_raise_end, - kw_from, - kw_to, - kw_endfor, - kw_extend, - kw_method, - kw_endclass, - kw_cmd, - kw_break, - kw_lst_assign, - kw_is_3, - kw_exit_1, - kw_exit_2, - kw_false, - kw_true, - kw_none, - kw_stackinit, - kw_push, - kw_pop, - kw_model, - kw_mod_new, - kw_class_init, - kw_self, - kw_call_begin, - kw_get_value -) - -class lexer(object): - def __init__(self, code, keywords): - self.code = code - self.keywords = keywords - self.line = 1 - self.re_new_line = re.compile(r"\r\n|\n\r|\n|\r") - self.re_number = r"^0[xX][0-9a-fA-F]*(\.[0-9a-fA-F]*)?([pP][+\-]?[0-9]+)?|^[0-9]*(\.[0-9]*)?([eE][+\-]?[0-9]+)?" - self.re_id = r"^[_\d\w]+|^[\u4e00-\u9fa5]+" - self.re_str = r"(?s)(^'(\\\\|\\'|\\\n|\\z\s*|[^'\n])*')|(^\"(\\\\|\\\"|\\\n|\\z\s*|[^\"\n])*\")" - self.re_expr = r"[|](.*?)[|]" - self.re_callfunc = r"[&](.*?)[)]" - self.op = r'(?P(相加){1}|(加){1}|(减){1}|(乘){1}|(整除){1}|(除){1}|(余){1}|(异或){1}|(取反){1}|(左移){1}|(右移){1}'\ - r'(与){1}(或者){1}|(或){1}|(系){1})|(同埋){1}|(自己嘅){1}|(比唔上){1}|(喺){1}' - self.op_get_code = re.findall(re.compile(r'[(](.*?)[)]', re.S), self.op[5 : ]) - self.op_gen_code = ["矩阵.matrix_addition", "+", "-", "*", "//", "/", "%", "^", "~", "<<", ">>", - "&", "or", "|", "==", "and", "self.", '<', 'in'] - self.build_in_funcs = r'(?P(瞓){1}|(加啲){1}|(摞走){1}|(嘅长度){1}|(阵先){1}|' \ - r'(畀你){1}|(散水){1})' - self.bif_get_code = re.findall(re.compile(r'[(](.*?)[)]', re.S), self.build_in_funcs[19 :]) - self.bif_gen_code = ["sleep", "append", "remove", ".__len__()", "2", "input", "clear"] - - def make_rep(self, list1 : list, list2 : list) -> list: - assert len(list1) == len(list2) - ret = [] - for i in range(len(list1)): - ret.append([list1[i], list2[i]]) - return ret - - def trans(self, code : str, rep : str) -> str: - p = re.match(r'\|(.*)同(.*)有几衬\|', code, re.M|re.I) - if p: - code = " corr(" + p.group(1) +", " + p.group(2) + ") " - for r in rep: - code = code.replace(r[0], r[1]) - return code - - def next(self, n): - self.code = self.code[n:] - - def check(self, s): - return self.code.startswith(s) - - @staticmethod - def is_white_space(c): - return c in ('\t', '\n', '\v', '\f', '\r', ' ') - - @staticmethod - def is_new_line(c): - return c in ('\r', '\n') - - @staticmethod - def isChinese(word): - for ch in word: - if '\u4e00' <= ch <= '\u9fff': - return True - return False - - - def skip_space(self): - while len(self.code) > 0: - if self.check('\r\n') or self.check('\n\r'): - self.next(2) - self.line += 1 - elif self.is_new_line(self.code[0]): - self.next(1) - self.line += 1 - elif self.check('?') or self.check(':') or self.check(':') or self.check('?'): - self.next(1) - elif self.is_white_space(self.code[0]): - self.next(1) - else: - break - - def scan(self, pattern): - m = re.match(pattern, self.code) - if m: - token = m.group() - self.next(len(token)) - return token - - def scan_identifier(self): - return self.scan(self.re_id) - - def scan_expr(self): - return self.scan(self.re_expr) - - def scan_number(self): - return self.scan(self.re_number) - - def scan_callfunc(self): - return self.scan(self.re_callfunc) - - def scan_short_string(self): - m = re.match(self.re_str, self.code) - if m: - s = m.group() - self.next(len(s)) - return s - self.error('unfinished string') - return '' - - def error(self, f, *args): - err = f.format(*args) - err = '{0}: {1}'.format(self.line, err) - raise Exception(err) - - def get_token(self): - self.skip_space() - if len(self.code) == 0: - return [self.line, ['EOF', 'EOF']] - - c = self.code[0] - - if c == '&': - token = self.scan_callfunc() + ')' - token = self.trans(token, self.make_rep(self.bif_get_code, self.bif_gen_code)) - return [self.line, ['expr', token]] - - if c == '|': - token = self.scan_expr() - token = self.trans(token, self.make_rep(self.bif_get_code, self.bif_gen_code)) - token = self.trans(token, self.make_rep(self.op_get_code, self.op_gen_code)) - return [self.line, ['expr', token]] - - if c == '-': - if self.check('->'): - self.next(2) - return [self.line, ['keyword', '->']] - - if c == '$': - self.next(1) - return [self.line, ['keyword', '$']] - - if c == '@': - self.next(1) - return [self.line, ['keyword', '@']] - - if c == '{': - self.next(1) - return [self.line, ['keyword', '{']] - - if c == '}': - self.next(1) - return [self.line, ['keyword', '}']] - - if self.isChinese(c) or c == '_' or c.isalpha(): - token = self.scan_identifier() - if token in self.keywords: - return [self.line, ['keywords', token]] - return [self.line, ['identifier', token]] - - if c in ('\'', '"'): - return [self.line, ['string', self.scan_short_string()]] - - if c == '.' or c.isdigit(): - token = self.scan_number() - return [self.line, ['num', token]] - - self.error("睇唔明嘅Token: " + c) - - def escape(self, s): - ret = '' - while len(s) > 0: - if s[0] != '\\': - ret += s[0] - s = s[1:] - continue - - if len(s) == 1: - self.error('unfinished string') - - if s[1] == 'a': - ret += '\a' - s = s[2:] - continue - elif s[1] == 'b': - ret += '\b' - s = s[2:] - continue - elif s[1] == 'f': - ret += '\f' - s = s[2:] - continue - elif s[1] == 'n' or s[1] == '\n': - ret += '\n' - s = s[2:] - continue - elif s[1] == 'r': - ret += '\r' - s = s[2:] - continue - elif s[1] == 't': - ret += '\t' - s = s[2:] - continue - elif s[1] == 'v': - ret += '\v' - s = s[2:] - continue - elif s[1] == '"': - ret += '"' - s = s[2:] - continue - elif s[1] == '\'': - ret += '\'' - s = s[2:] - continue - elif s[1] == '\\': - ret += '\\' - s = s[2:] - continue - -def cantonese_token(code : str, keywords : str) -> list: - lex = lexer(code, keywords) - tokens = [] - while True: - token = lex.get_token() - tokens.append(token) - if token[1] == ['EOF', 'EOF']: - break - return tokens - """ AST node for the Token List """ diff --git "a/src/\350\257\215\346\263\225.py" "b/src/\350\257\215\346\263\225.py" new file mode 100644 index 0000000..f5c618d --- /dev/null +++ "b/src/\350\257\215\346\263\225.py" @@ -0,0 +1,347 @@ +import re + +""" + Get the Cantonese Token List +""" + +kw_print = "畀我睇下" +kw_endprint = "点样先" +kw_exit = "收工" +kw_in = "喺" +kw_elif = "定系" +kw_turtle_beg = "老作一下" +kw_type = "起底" +kw_assign = "讲嘢" +kw_class_def = "咩系" +kw_else_or_not = "唔系" +kw_is = "系" +kw_if = "如果" +kw_then = "嘅话" +kw_do = "->" +kw_begin = "{" +kw_end = "}" +kw_pass = "咩都唔做" +kw_while_do = "落操场玩跑步" +kw_function = "$" +kw_call = "用下" +kw_import = "使下" +kw_func_begin = "要做咩" +kw_func_end = "搞掂" +kw_is_2 = "就" +kw_assert = "谂下" +kw_class_assign = "佢嘅" +kw_while = "玩到" +kw_whi_end = "为止" +kw_return = "还数" +kw_try = "执嘢" +kw_except = "揾到" +kw_finally = "执手尾" +kw_raise = "掟个" +kw_raise_end = "来睇下" +kw_from = "从" +kw_to = "行到" +kw_endfor = "行晒" +kw_extend = "佢个老豆叫" +kw_method = "佢识得" +kw_endclass = "明白未啊" +kw_cmd = "落Order" +kw_break = "饮茶先啦" +kw_lst_assign = "拍住上" +kw_is_3 = "係" +kw_exit_1 = "辛苦晒啦" +kw_exit_2 = "同我躝" +kw_false = "唔啱" +kw_true = "啱" +kw_none = "冇" +kw_stackinit = "有条仆街叫" +kw_push = "顶你" +kw_pop = "丢你" +kw_model = "嗌" +kw_mod_new = "过嚟估下" +kw_class_init = "佢有啲咩" +kw_self = "自己嘅" +kw_call_begin = "下" +kw_get_value = "@" + +keywords = ( + kw_print, + kw_endprint, + kw_exit, + kw_in, + kw_elif, + kw_turtle_beg, + kw_type, + kw_assign, + kw_class_def, + kw_else_or_not, + kw_is, + kw_if, + kw_then, + kw_do, + kw_begin, + kw_end, + kw_pass, + kw_while_do, + kw_function, + kw_call, + kw_import, + kw_func_begin, + kw_func_end, + kw_is_2, + kw_assert, + kw_class_assign, + kw_while, + kw_whi_end, + kw_return, + kw_try, + kw_except, + kw_finally, + kw_raise, + kw_raise_end, + kw_from, + kw_to, + kw_endfor, + kw_extend, + kw_method, + kw_endclass, + kw_cmd, + kw_break, + kw_lst_assign, + kw_is_3, + kw_exit_1, + kw_exit_2, + kw_false, + kw_true, + kw_none, + kw_stackinit, + kw_push, + kw_pop, + kw_model, + kw_mod_new, + kw_class_init, + kw_self, + kw_call_begin, + kw_get_value +) + +class lexer(object): + def __init__(self, code, keywords): + self.code = code + self.keywords = keywords + self.line = 1 + self.re_new_line = re.compile(r"\r\n|\n\r|\n|\r") + self.re_number = r"^0[xX][0-9a-fA-F]*(\.[0-9a-fA-F]*)?([pP][+\-]?[0-9]+)?|^[0-9]*(\.[0-9]*)?([eE][+\-]?[0-9]+)?" + self.re_id = r"^[_\d\w]+|^[\u4e00-\u9fa5]+" + self.re_str = r"(?s)(^'(\\\\|\\'|\\\n|\\z\s*|[^'\n])*')|(^\"(\\\\|\\\"|\\\n|\\z\s*|[^\"\n])*\")" + self.re_expr = r"[|](.*?)[|]" + self.re_callfunc = r"[&](.*?)[)]" + self.op = r'(?P(相加){1}|(加){1}|(减){1}|(乘){1}|(整除){1}|(除){1}|(余){1}|(异或){1}|(取反){1}|(左移){1}|(右移){1}'\ + r'(与){1}(或者){1}|(或){1}|(系){1})|(同埋){1}|(自己嘅){1}|(比唔上){1}|(喺){1}' + self.op_get_code = re.findall(re.compile(r'[(](.*?)[)]', re.S), self.op[5 : ]) + self.op_gen_code = ["矩阵.matrix_addition", "+", "-", "*", "//", "/", "%", "^", "~", "<<", ">>", + "&", "or", "|", "==", "and", "self.", '<', 'in'] + self.build_in_funcs = r'(?P(瞓){1}|(加啲){1}|(摞走){1}|(嘅长度){1}|(阵先){1}|' \ + r'(畀你){1}|(散水){1})' + self.bif_get_code = re.findall(re.compile(r'[(](.*?)[)]', re.S), self.build_in_funcs[19 :]) + self.bif_gen_code = ["sleep", "append", "remove", ".__len__()", "2", "input", "clear"] + + def make_rep(self, list1 : list, list2 : list) -> list: + assert len(list1) == len(list2) + ret = [] + for i in range(len(list1)): + ret.append([list1[i], list2[i]]) + return ret + + def trans(self, code : str, rep : str) -> str: + p = re.match(r'\|(.*)同(.*)有几衬\|', code, re.M|re.I) + if p: + code = " corr(" + p.group(1) +", " + p.group(2) + ") " + for r in rep: + code = code.replace(r[0], r[1]) + return code + + def next(self, n): + self.code = self.code[n:] + + def check(self, s): + return self.code.startswith(s) + + @staticmethod + def is_white_space(c): + return c in ('\t', '\n', '\v', '\f', '\r', ' ') + + @staticmethod + def is_new_line(c): + return c in ('\r', '\n') + + @staticmethod + def isChinese(word): + for ch in word: + if '\u4e00' <= ch <= '\u9fff': + return True + return False + + + def skip_space(self): + while len(self.code) > 0: + if self.check('\r\n') or self.check('\n\r'): + self.next(2) + self.line += 1 + elif self.is_new_line(self.code[0]): + self.next(1) + self.line += 1 + elif self.check('?') or self.check(':') or self.check(':') or self.check('?'): + self.next(1) + elif self.is_white_space(self.code[0]): + self.next(1) + else: + break + + def scan(self, pattern): + m = re.match(pattern, self.code) + if m: + token = m.group() + self.next(len(token)) + return token + + def scan_identifier(self): + return self.scan(self.re_id) + + def scan_expr(self): + return self.scan(self.re_expr) + + def scan_number(self): + return self.scan(self.re_number) + + def scan_callfunc(self): + return self.scan(self.re_callfunc) + + def scan_short_string(self): + m = re.match(self.re_str, self.code) + if m: + s = m.group() + self.next(len(s)) + return s + self.error('unfinished string') + return '' + + def error(self, f, *args): + err = f.format(*args) + err = '{0}: {1}'.format(self.line, err) + raise Exception(err) + + def get_token(self): + self.skip_space() + if len(self.code) == 0: + return [self.line, ['EOF', 'EOF']] + + c = self.code[0] + + if c == '&': + token = self.scan_callfunc() + ')' + token = self.trans(token, self.make_rep(self.bif_get_code, self.bif_gen_code)) + return [self.line, ['expr', token]] + + if c == '|': + token = self.scan_expr() + token = self.trans(token, self.make_rep(self.bif_get_code, self.bif_gen_code)) + token = self.trans(token, self.make_rep(self.op_get_code, self.op_gen_code)) + return [self.line, ['expr', token]] + + if c == '-': + if self.check('->'): + self.next(2) + return [self.line, ['keyword', '->']] + + if c == '$': + self.next(1) + return [self.line, ['keyword', '$']] + + if c == '@': + self.next(1) + return [self.line, ['keyword', '@']] + + if c == '{': + self.next(1) + return [self.line, ['keyword', '{']] + + if c == '}': + self.next(1) + return [self.line, ['keyword', '}']] + + if self.isChinese(c) or c == '_' or c.isalpha(): + token = self.scan_identifier() + if token in self.keywords: + return [self.line, ['keywords', token]] + return [self.line, ['identifier', token]] + + if c in ('\'', '"'): + return [self.line, ['string', self.scan_short_string()]] + + if c == '.' or c.isdigit(): + token = self.scan_number() + return [self.line, ['num', token]] + + self.error("睇唔明嘅Token: " + c) + + def escape(self, s): + ret = '' + while len(s) > 0: + if s[0] != '\\': + ret += s[0] + s = s[1:] + continue + + if len(s) == 1: + self.error('unfinished string') + + if s[1] == 'a': + ret += '\a' + s = s[2:] + continue + elif s[1] == 'b': + ret += '\b' + s = s[2:] + continue + elif s[1] == 'f': + ret += '\f' + s = s[2:] + continue + elif s[1] == 'n' or s[1] == '\n': + ret += '\n' + s = s[2:] + continue + elif s[1] == 'r': + ret += '\r' + s = s[2:] + continue + elif s[1] == 't': + ret += '\t' + s = s[2:] + continue + elif s[1] == 'v': + ret += '\v' + s = s[2:] + continue + elif s[1] == '"': + ret += '"' + s = s[2:] + continue + elif s[1] == '\'': + ret += '\'' + s = s[2:] + continue + elif s[1] == '\\': + ret += '\\' + s = s[2:] + continue + +def cantonese_token(code : str, keywords : str) -> list: + lex = lexer(code, keywords) + tokens = [] + while True: + token = lex.get_token() + tokens.append(token) + if token[1] == ['EOF', 'EOF']: + break + return tokens From 1d0ce5e89a793e469dcbd33765b81ffe04baa2f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B4=E7=83=9C?= Date: Thu, 29 Jul 2021 10:04:50 -0700 Subject: [PATCH 2/3] =?UTF-8?q?#25=20=E6=8B=86=E5=87=BA=E8=AF=AD=E6=B3=95?= =?UTF-8?q?=E6=A0=91=E8=8A=82=E7=82=B9=E9=83=A8=E5=88=86?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cantonese.py | 211 +----------------- "src/\350\257\255\346\263\225\346\240\221.py" | 209 +++++++++++++++++ 2 files changed, 210 insertions(+), 210 deletions(-) create mode 100644 "src/\350\257\255\346\263\225\346\240\221.py" diff --git a/src/cantonese.py b/src/cantonese.py index 49dc014..608a7aa 100644 --- a/src/cantonese.py +++ b/src/cantonese.py @@ -10,218 +10,9 @@ import argparse from 濑嘢 import 濑啲咩嘢 from 词法 import * +from 语法树 import * from stack_vm import * -""" - AST node for the Token List -""" -def node_print_new(Node : list, arg) -> None: - """ - Node_print - | - arg - """ - Node.append(["node_print", arg]) - -def node_sleep_new(Node : list, arg) -> None: - """ - Node_sleep - | - arg - """ - Node.append(["node_sleep", arg]) - -def node_break_new(Node : list) -> None: - Node.append(["node_break"]) - -def node_exit_new(Node : list) -> None: - """ - Node_exit - | - arg - """ - Node.append(["node_exit"]) - -def node_let_new(Node : list, key ,value) -> None: - """ - Node_let - / \ - key value - """ - Node.append(["node_let", key, value]) - -def node_if_new(Node : list, cond, stmt) -> None: - """ - Node_if - / \ - cond stmt - """ - Node.append(["node_if", cond, stmt]) - -def node_elif_new(Node : list, cond, stmt) -> None: - """ - Node_elif - / \ - cond stmt - """ - Node.append(["node_elif", cond, stmt]) - -def node_else_new(Node : list, stmt) -> None: - """ - Node_else - | - stmt - """ - Node.append(["node_else", stmt]) - -def node_loop_new(Node : list, cond, stmt) -> None: - """ - Node_loop - / \ - cond stmt - """ - Node.append(["node_loop", cond, stmt]) - -def node_func_new(Node : list, func_name, args, body) -> None: - """ - Node_fundef - / | \ - name args body - """ - Node.append(["node_fundef", func_name, args, body]) - -def node_call_new(Node : list, func_name) -> None: - """ - Node_call - | - name - """ - Node.append(["node_call", func_name]) - -def node_build_in_func_call_new(Node : list, var, func_name, args) -> None: - """ - Node_bcall - / \ - name args - """ - Node.append(["node_bcall", var, func_name, args]) - -def node_import_new(Node : list, name) -> None: - """ - Node_import - | - name - """ - Node.append(["node_import", name]) - -def node_return_new(Node : list, v) -> None: - """ - Node_return - | - value - """ - Node.append(["node_return", v]) - -def node_try_new(Node : list, try_part) -> None: - """ - Node_try - | - stmt - """ - Node.append(["node_try", try_part]) - -def node_except_new(Node : list, _except, except_part) -> None: - """ - Node_except - / \ - exception stmt - """ - Node.append(["node_except", _except, except_part]) - -def node_finally_new(Node : list, finally_part) -> None: - """ - Node_finally - | - stmt - """ - Node.append(["node_finally", finally_part]) - -def node_raise_new(Node : list, execption) -> None: - """ - Node_raise - | - exception - """ - Node.append(["node_raise", execption]) - -def node_for_new(Node : list, iterating_var, sequence, stmt_part) -> None: - """ - Node_for - / | \ - iter seq stmt - """ - Node.append(["node_for", iterating_var, sequence, stmt_part]) - -def node_turtle_new(Node : list, instruction) -> None: - Node.append(["node_turtle", instruction]) - -def node_assert_new(Node : list, args) -> None: - Node.append(["node_assert", args]) - -def node_model_new(Node : list, model, datatest) -> None: - """ - Node_model - / \ - model dataset - """ - Node.append(["node_model", model, datatest]) - -def node_gettype_new(Node : list, value) -> None: - Node.append(["node_gettype", value]) - -def node_class_new(Node : list, name, extend, method) -> None: - """ - Node_class - / | \ - name extend method - """ - Node.append(["node_class", name, extend, method]) - -def node_attribute_new(Node : list, attr_list) -> None: - Node.append(["node_attr", attr_list]) - -def node_method_new(Node : list, name, args, stmt) -> None: - """ - Node_method - / | \ - name args stmt - """ - Node.append(["node_method", name, args, stmt]) - -def node_cmd_new(Node : list, cmd) -> None: - """ - Node_cmd - | - conmmand - """ - Node.append(["node_cmd", cmd]) - -def node_list_new(Node : list, name, list) -> None: - """ - Node_list - / \ - name list - """ - Node.append(["node_list", name, list]) - -def node_stack_new(Node : list, name) -> None: - """ - Node_stack - | - name - """ - Node.append(["node_stack", name]) - """ Parser for cantonese Token List """ diff --git "a/src/\350\257\255\346\263\225\346\240\221.py" "b/src/\350\257\255\346\263\225\346\240\221.py" new file mode 100644 index 0000000..bed00e2 --- /dev/null +++ "b/src/\350\257\255\346\263\225\346\240\221.py" @@ -0,0 +1,209 @@ +""" + AST node for the Token List +""" +def node_print_new(Node : list, arg) -> None: + """ + Node_print + | + arg + """ + Node.append(["node_print", arg]) + +def node_sleep_new(Node : list, arg) -> None: + """ + Node_sleep + | + arg + """ + Node.append(["node_sleep", arg]) + +def node_break_new(Node : list) -> None: + Node.append(["node_break"]) + +def node_exit_new(Node : list) -> None: + """ + Node_exit + | + arg + """ + Node.append(["node_exit"]) + +def node_let_new(Node : list, key ,value) -> None: + """ + Node_let + / \ + key value + """ + Node.append(["node_let", key, value]) + +def node_if_new(Node : list, cond, stmt) -> None: + """ + Node_if + / \ + cond stmt + """ + Node.append(["node_if", cond, stmt]) + +def node_elif_new(Node : list, cond, stmt) -> None: + """ + Node_elif + / \ + cond stmt + """ + Node.append(["node_elif", cond, stmt]) + +def node_else_new(Node : list, stmt) -> None: + """ + Node_else + | + stmt + """ + Node.append(["node_else", stmt]) + +def node_loop_new(Node : list, cond, stmt) -> None: + """ + Node_loop + / \ + cond stmt + """ + Node.append(["node_loop", cond, stmt]) + +def node_func_new(Node : list, func_name, args, body) -> None: + """ + Node_fundef + / | \ + name args body + """ + Node.append(["node_fundef", func_name, args, body]) + +def node_call_new(Node : list, func_name) -> None: + """ + Node_call + | + name + """ + Node.append(["node_call", func_name]) + +def node_build_in_func_call_new(Node : list, var, func_name, args) -> None: + """ + Node_bcall + / \ + name args + """ + Node.append(["node_bcall", var, func_name, args]) + +def node_import_new(Node : list, name) -> None: + """ + Node_import + | + name + """ + Node.append(["node_import", name]) + +def node_return_new(Node : list, v) -> None: + """ + Node_return + | + value + """ + Node.append(["node_return", v]) + +def node_try_new(Node : list, try_part) -> None: + """ + Node_try + | + stmt + """ + Node.append(["node_try", try_part]) + +def node_except_new(Node : list, _except, except_part) -> None: + """ + Node_except + / \ + exception stmt + """ + Node.append(["node_except", _except, except_part]) + +def node_finally_new(Node : list, finally_part) -> None: + """ + Node_finally + | + stmt + """ + Node.append(["node_finally", finally_part]) + +def node_raise_new(Node : list, execption) -> None: + """ + Node_raise + | + exception + """ + Node.append(["node_raise", execption]) + +def node_for_new(Node : list, iterating_var, sequence, stmt_part) -> None: + """ + Node_for + / | \ + iter seq stmt + """ + Node.append(["node_for", iterating_var, sequence, stmt_part]) + +def node_turtle_new(Node : list, instruction) -> None: + Node.append(["node_turtle", instruction]) + +def node_assert_new(Node : list, args) -> None: + Node.append(["node_assert", args]) + +def node_model_new(Node : list, model, datatest) -> None: + """ + Node_model + / \ + model dataset + """ + Node.append(["node_model", model, datatest]) + +def node_gettype_new(Node : list, value) -> None: + Node.append(["node_gettype", value]) + +def node_class_new(Node : list, name, extend, method) -> None: + """ + Node_class + / | \ + name extend method + """ + Node.append(["node_class", name, extend, method]) + +def node_attribute_new(Node : list, attr_list) -> None: + Node.append(["node_attr", attr_list]) + +def node_method_new(Node : list, name, args, stmt) -> None: + """ + Node_method + / | \ + name args stmt + """ + Node.append(["node_method", name, args, stmt]) + +def node_cmd_new(Node : list, cmd) -> None: + """ + Node_cmd + | + conmmand + """ + Node.append(["node_cmd", cmd]) + +def node_list_new(Node : list, name, list) -> None: + """ + Node_list + / \ + name list + """ + Node.append(["node_list", name, list]) + +def node_stack_new(Node : list, name) -> None: + """ + Node_stack + | + name + """ + Node.append(["node_stack", name]) From d1313f9a9d578d46d38ee5a5b354a0ca43f850dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=90=B4=E7=83=9C?= Date: Thu, 29 Jul 2021 10:11:51 -0700 Subject: [PATCH 3/3] =?UTF-8?q?#25=20=E6=8B=86=E5=87=BA=E8=AF=AD=E6=B3=95?= =?UTF-8?q?=E5=88=86=E6=9E=90=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cantonese.py | 433 +----------------------------- "src/\350\257\255\346\263\225.py" | 432 +++++++++++++++++++++++++++++ 2 files changed, 434 insertions(+), 431 deletions(-) create mode 100644 "src/\350\257\255\346\263\225.py" diff --git a/src/cantonese.py b/src/cantonese.py index 608a7aa..6343f62 100644 --- a/src/cantonese.py +++ b/src/cantonese.py @@ -9,440 +9,11 @@ import os import argparse from 濑嘢 import 濑啲咩嘢 -from 词法 import * +from 词法 import cantonese_token, keywords +from 语法 import Parser from 语法树 import * from stack_vm import * -""" - Parser for cantonese Token List -""" -class Parser(object): - def __init__(self, tokens, Node): - self.tokens = tokens - self.pos = 0 - self.Node = Node - - def syntax_check(self, token, tag): - if tag == "value" and self.get(0)[1] == token: - return - elif tag == "type" and self.get(0)[0] == token: - return - else: - raise "Syntax error!" - - def get(self, offset): - if self.pos + offset >= len(self.tokens): - return ["", ""] - return self.tokens[self.pos + offset][1] - - def get_value(self, token): - if token[0] == 'expr': - # If is expr, Remove the "|" - token[1] = token[1][1 : -1] - if token[0] == 'callfunc': - # If is call func, Remove the '&' - token[1] = token[1][1 :] - return token - - def last(self, offset): - return self.tokens[self.pos - offset][1] - - def skip(self, offset): - self.pos += offset - - def match(self, name): - if self.get(0)[1] == name: - self.pos += 1 - return True - else: - return False - - def match_type(self, type): - if self.get(0)[0] == type: - self.pos += 1 - return True - else: - return False - - # TODO: Add error check - def parse(self): - while True: - if self.match(kw_print): - node_print_new(self.Node, self.get_value(self.get(0))) - self.skip(2) # Skip the args and end_print - - elif self.match("sleep"): - node_sleep_new(self.Node, self.get(0)) - self.skip(1) - - elif self.match(kw_exit) or self.match(kw_exit_1) or self.match(kw_exit_2): - node_exit_new(self.Node) - self.skip(1) - - elif self.match(kw_class_assign) and (self.get(1)[1] == kw_is or self.get(1)[1] == kw_is_2 or \ - self.get(1)[1] == kw_is_3): - node_let_new(self.Node, self.get_value(self.get(0)), self.get_value(self.get(2))) - self.skip(3) - - elif self.match(kw_assign) and (self.get(1)[1] == kw_is or self.get(1)[1] == kw_is_2 or \ - self.get(1)[1] == kw_is_3): - node_let_new(self.Node, self.get_value(self.get(0)), self.get_value(self.get(2))) - self.skip(3) - - elif self.match(kw_if): - cond = self.get_value(self.get(0)) - self.skip(4) # Skip the "then", "do", "begin" - if_case_end = 0 # The times of case "end" - if_should_end = 1 - node_if = [] - stmt_if = [] - while if_case_end != if_should_end and self.pos < len(self.tokens): - if self.get(0)[1] == kw_if: - if_should_end += 1 - stmt_if.append(self.tokens[self.pos]) - self.pos += 1 - elif self.get(0)[1] == kw_end: - if_case_end += 1 - if if_case_end != if_should_end: - stmt_if.append(self.tokens[self.pos]) - self.pos += 1 - elif self.get(0)[1] == kw_elif: - if_should_end += 1 - stmt_if.append(self.tokens[self.pos]) - self.pos += 1 - else: - stmt_if.append(self.tokens[self.pos]) - self.pos += 1 - Parser(stmt_if, node_if).parse() - node_if_new(self.Node, cond, node_if) - - elif self.match(kw_elif): # case "定系" elif - cond = self.get_value(self.get(0)) - self.skip(4) # Skip the "then", "do", "begin" - elif_case_end = 0 # The times of case "end" - elif_should_end = 1 - node_elif = [] - stmt_elif = [] - while elif_case_end != elif_should_end and self.pos < len(self.tokens): - if self.get(0)[1] == kw_if: - elif_should_end += 1 - stmt_elif.append(self.tokens[self.pos]) - self.pos += 1 - elif self.get(0)[1] == kw_end: - elif_case_end += 1 - if elif_case_end != elif_should_end: - stmt_elif.append(self.tokens[self.pos]) - self.pos += 1 - elif self.get(0)[1] == kw_elif: - elif_should_end += 1 - stmt_elif.append(self.tokens[self.pos]) - self.pos += 1 - else: - stmt_elif.append(self.tokens[self.pos]) - self.pos += 1 - Parser(stmt_elif, node_elif).parse() - node_elif_new(self.Node, cond, node_elif) - - elif self.match(kw_else_or_not): # case "唔系" else - self.skip(3) # Skip the "then", "do", "begin" - else_case_end = 0 # The times of case "end" - else_should_end = 1 - node_else = [] - stmt_else = [] - while else_case_end != else_should_end and self.pos < len(self.tokens): - if self.get(0)[1] == kw_if: - else_should_end += 1 - stmt_else.append(self.tokens[self.pos]) - self.pos += 1 - elif self.get(0)[1] == kw_end: - else_case_end += 1 - if else_case_end != else_should_end: - stmt_else.append(self.tokens[self.pos]) - self.pos += 1 - elif self.get(0)[1] == kw_elif: - else_should_end += 1 - stmt_else.append(self.tokens[self.pos]) - self.pos += 1 - else: - stmt_else.append(self.tokens[self.pos]) - self.pos += 1 - Parser(stmt_else, node_else).parse() - node_else_new(self.Node, node_else) - - elif self.match(kw_while_do): - stmt = [] - while self.tokens[self.pos][1][1] != kw_while: - stmt.append(self.tokens[self.pos]) - self.pos += 1 - node_while = [] - self.skip(1) - cond = self.get_value(self.get(0)) - Parser(stmt, node_while).parse() - node_loop_new(self.Node, cond, node_while) - self.skip(2) # Skip the "end" - - elif self.match(kw_function): # Case "function" - if self.get(1)[0] == 'expr': - func_name = self.get_value(self.get(0)) - args = self.get_value(self.get(1)) - self.skip(3) - func_stmt = [] - while self.tokens[self.pos][1][1] != kw_func_end: - func_stmt.append(self.tokens[self.pos]) - self.pos += 1 - node_func = [] - Parser(func_stmt, node_func).parse() - node_func_new(self.Node, func_name, args, node_func) - self.skip(1) # Skip the funcend - else: - func_name = self.get_value(self.get(0)) - self.skip(2) # Skip the funcbegin - func_stmt = [] - while self.tokens[self.pos][1][1] != kw_func_end: - func_stmt.append(self.tokens[self.pos]) - self.pos += 1 - node_func = [] - Parser(func_stmt, node_func).parse() - node_func_new(self.Node, func_name, "None", node_func) - self.skip(1) # Skip the funcend - - elif self.match(kw_turtle_beg): - self.skip(2) # Skip the "do", "begin" - turtle_inst = [] - while self.tokens[self.pos][1][1] != kw_end: - if self.tokens[self.pos][1][0] == 'identifier': - pass - else: - turtle_inst.append(self.get_value(self.tokens[self.pos][1])[1]) - self.pos += 1 - node_turtle_new(self.Node, turtle_inst) - self.skip(1) - - elif self.match(kw_call): - node_call_new(self.Node, self.get_value(self.get(0))) - self.skip(1) - - elif self.match(kw_import): - node_import_new(self.Node, self.get_value(self.get(0))) - self.skip(1) - - elif self.match_type("expr") or self.match_type("identifier"): - if self.match(kw_from): - iterating_var = self.get_value(self.get(-2)) - seq = "(" + str(self.get_value(self.get(0))[1]) + "," \ - + str(self.get_value(self.get(2))[1]) + ")" - self.skip(3) - node_for = [] - for_stmt = [] - for_case_end = 0 - for_should_end = 1 - while for_should_end != for_case_end and self.pos < len(self.tokens): - if (self.get(0)[0] == "expr" or self.get(0)[0] == "identifier") \ - and self.get(1)[1] == kw_from: - for_should_end += 1 - for_stmt.append(self.tokens[self.pos]) - self.pos += 1 - elif self.get(0)[1] == kw_endfor: - for_case_end += 1 - if for_case_end != for_should_end: - for_stmt.append(self.tokens[self.pos]) - self.pos += 1 - else: - for_stmt.append(self.tokens[self.pos]) - self.pos += 1 - Parser(for_stmt, node_for).parse() - node_for_new(self.Node, iterating_var, seq, node_for) - if self.get(0)[1] == kw_lst_assign: - self.skip(1) - list = self.get_value(self.get(-2)) - name = self.get_value(self.get(1)) - node_list_new(self.Node, name, list) - self.skip(2) - - if self.get(0)[1] == kw_do: - self.skip(1) - id = self.get_value(self.get(-2)) - args = self.get_value(self.get(1)) - func = self.get_value(self.get(0)) - node_build_in_func_call_new(self.Node, id, func, args) - self.skip(2) - if self.get(0)[1] == kw_call_begin: - func_name = self.get_value(self.get(-1)) - self.skip(2) - args = self.get_value(self.get(0)) - cons = ['expr', func_name[1] + '(' + args[1] + ')'] - self.skip(1) - if self.get(0)[1] == kw_get_value: - self.skip(1) - v = self.get_value(self.get(0)) - node_let_new(self.Node, v, cons) - else: - node_call_new(self.Node, cons) - - elif self.match(kw_return): - node_return_new(self.Node, self.get_value(self.get(0))) - self.skip(1) - - elif self.match(kw_try): - self.skip(2) # SKip the "begin, do" - should_end = 1 - case_end = 0 - node_try = [] - stmt_try = [] - while case_end != should_end and self.pos < len(self.tokens): - if self.get(0)[1] == kw_end: - case_end += 1 - self.pos += 1 - else: - stmt_try.append(self.tokens[self.pos]) - self.pos += 1 - Parser(stmt_try, node_try).parse() - node_try_new(self.Node, node_try) - - elif self.match(kw_except): - _except = self.get_value(self.get(0)) - self.skip(4) # SKip the "except", "then", "begin", "do" - should_end = 1 - case_end = 0 - node_except = [] - stmt_except = [] - while case_end != should_end and self.pos < len(self.tokens): - if self.get(0)[1] == kw_end: - case_end += 1 - self.pos += 1 - else: - stmt_except.append(self.tokens[self.pos]) - self.pos += 1 - Parser(stmt_except, node_except).parse() - node_except_new(self.Node, _except , node_except) - - elif self.match(kw_finally): - self.skip(2) # Skip the "begin", "do" - should_end = 1 - case_end = 0 - node_finally = [] - stmt_finally = [] - while case_end != should_end and self.pos < len(self.tokens): - if self.get(0)[1] == kw_end: - case_end += 1 - self.pos += 1 - else: - stmt_finally.append(self.tokens[self.pos]) - self.pos += 1 - Parser(stmt_finally, node_finally).parse() - node_finally_new(self.Node, node_finally) - - elif self.match(kw_assert): - node_assert_new(self.Node, self.get_value(self.get(0))) - self.skip(1) - - elif self.match(kw_raise): - node_raise_new(self.Node, self.get_value(self.get(0))) - self.skip(2) - - elif self.match(kw_type): - node_gettype_new(self.Node, self.get_value(self.get(0))) - self.skip(1) - - elif self.match(kw_pass): - self.Node.append(["node_pass"]) - - elif self.match(kw_break): - node_break_new(self.Node) - - elif self.match(kw_class_def): - class_name = self.get_value(self.get(0)) - self.skip(1) - if self.match(kw_extend): - extend = self.get_value(self.get(0)) - self.skip(1) - class_stmt = [] - node_class = [] - while self.tokens[self.pos][1][1] != kw_endclass: - class_stmt.append(self.tokens[self.pos]) - self.pos += 1 - Parser(class_stmt, node_class).parse() - self.skip(1) # Skip the "end" - node_class_new(self.Node, class_name, extend, node_class) - - elif self.match(kw_class_init): - self.skip(1) - attr_lst = self.get_value(self.get(0)) - self.skip(1) - node_attribute_new(self.Node, attr_lst) - - elif self.match(kw_method): - method_name = self.get_value(self.get(0)) - self.skip(1) - # Check if has args - if self.get(0)[0] == "expr": - args = self.get_value(self.get(0)) - self.skip(1) - else: - args = "None" - self.skip(2) # Skip the "do", "begin" - method_stmt = [] - node_method = [] - method_should_end = 1 - method_case_end = 0 - while method_case_end != method_should_end and self.pos < len(self.tokens): - if self.get(0)[1] == kw_end: - method_case_end += 1 - if method_case_end != method_should_end: - method_stmt.append(self.tokens[self.pos]) - self.pos += 1 - elif self.get(0)[1] == kw_if: - method_should_end += 1 - method_stmt.append(self.tokens[self.pos]) - self.pos += 1 - elif self.get(0)[1] == kw_elif: - method_should_end += 1 - method_stmt.append(self.tokens[self.pos]) - self.pos += 1 - elif self.get(0)[1] == kw_else_or_not: - method_should_end += 1 - method_stmt.append(self.tokens[self.pos]) - self.pos += 1 - else: - method_stmt.append(self.tokens[self.pos]) - self.pos += 1 - Parser(method_stmt, node_method).parse() - node_method_new(self.Node, method_name, args, node_method) - - elif self.match(kw_cmd): - node_cmd_new(self.Node, self.get_value(self.get(0))) - self.skip(1) - - elif self.match(kw_model): - model = self.get_value(self.get(0)) - self.skip(1) - self.syntax_check(kw_mod_new, "value") - self.skip(2) - datatest = self.get_value(self.get(0)) - self.skip(1) - node_model_new(self.Node, model, datatest) - - elif self.match(kw_stackinit): - node_stack_new(self.Node, self.get_value(self.get(0))) - self.skip(1) - - elif self.match(kw_push): - self.syntax_check(kw_do, "value") - self.skip(1) - self.Node.append(["stack_push", self.get_value(self.get(0)), self.get_value(self.\ - get(1))]) - self.skip(2) - - elif self.match(kw_pop): - self.syntax_check(kw_do, "value") - self.skip(1) - self.Node.append(["stack_pop", self.get_value(self.get(0)), self.get_value(self.\ - get(1))]) - self.skip(1) - - else: - break - variable = {} TO_PY_CODE = "" use_tradition = False # 是否使用繁体 diff --git "a/src/\350\257\255\346\263\225.py" "b/src/\350\257\255\346\263\225.py" new file mode 100644 index 0000000..b2ba52a --- /dev/null +++ "b/src/\350\257\255\346\263\225.py" @@ -0,0 +1,432 @@ +from 词法 import * +from 语法树 import * + +""" + Parser for cantonese Token List +""" +class Parser(object): + def __init__(self, tokens, Node): + self.tokens = tokens + self.pos = 0 + self.Node = Node + + def syntax_check(self, token, tag): + if tag == "value" and self.get(0)[1] == token: + return + elif tag == "type" and self.get(0)[0] == token: + return + else: + raise "Syntax error!" + + def get(self, offset): + if self.pos + offset >= len(self.tokens): + return ["", ""] + return self.tokens[self.pos + offset][1] + + def get_value(self, token): + if token[0] == 'expr': + # If is expr, Remove the "|" + token[1] = token[1][1 : -1] + if token[0] == 'callfunc': + # If is call func, Remove the '&' + token[1] = token[1][1 :] + return token + + def last(self, offset): + return self.tokens[self.pos - offset][1] + + def skip(self, offset): + self.pos += offset + + def match(self, name): + if self.get(0)[1] == name: + self.pos += 1 + return True + else: + return False + + def match_type(self, type): + if self.get(0)[0] == type: + self.pos += 1 + return True + else: + return False + + # TODO: Add error check + def parse(self): + while True: + if self.match(kw_print): + node_print_new(self.Node, self.get_value(self.get(0))) + self.skip(2) # Skip the args and end_print + + elif self.match("sleep"): + node_sleep_new(self.Node, self.get(0)) + self.skip(1) + + elif self.match(kw_exit) or self.match(kw_exit_1) or self.match(kw_exit_2): + node_exit_new(self.Node) + self.skip(1) + + elif self.match(kw_class_assign) and (self.get(1)[1] == kw_is or self.get(1)[1] == kw_is_2 or \ + self.get(1)[1] == kw_is_3): + node_let_new(self.Node, self.get_value(self.get(0)), self.get_value(self.get(2))) + self.skip(3) + + elif self.match(kw_assign) and (self.get(1)[1] == kw_is or self.get(1)[1] == kw_is_2 or \ + self.get(1)[1] == kw_is_3): + node_let_new(self.Node, self.get_value(self.get(0)), self.get_value(self.get(2))) + self.skip(3) + + elif self.match(kw_if): + cond = self.get_value(self.get(0)) + self.skip(4) # Skip the "then", "do", "begin" + if_case_end = 0 # The times of case "end" + if_should_end = 1 + node_if = [] + stmt_if = [] + while if_case_end != if_should_end and self.pos < len(self.tokens): + if self.get(0)[1] == kw_if: + if_should_end += 1 + stmt_if.append(self.tokens[self.pos]) + self.pos += 1 + elif self.get(0)[1] == kw_end: + if_case_end += 1 + if if_case_end != if_should_end: + stmt_if.append(self.tokens[self.pos]) + self.pos += 1 + elif self.get(0)[1] == kw_elif: + if_should_end += 1 + stmt_if.append(self.tokens[self.pos]) + self.pos += 1 + else: + stmt_if.append(self.tokens[self.pos]) + self.pos += 1 + Parser(stmt_if, node_if).parse() + node_if_new(self.Node, cond, node_if) + + elif self.match(kw_elif): # case "定系" elif + cond = self.get_value(self.get(0)) + self.skip(4) # Skip the "then", "do", "begin" + elif_case_end = 0 # The times of case "end" + elif_should_end = 1 + node_elif = [] + stmt_elif = [] + while elif_case_end != elif_should_end and self.pos < len(self.tokens): + if self.get(0)[1] == kw_if: + elif_should_end += 1 + stmt_elif.append(self.tokens[self.pos]) + self.pos += 1 + elif self.get(0)[1] == kw_end: + elif_case_end += 1 + if elif_case_end != elif_should_end: + stmt_elif.append(self.tokens[self.pos]) + self.pos += 1 + elif self.get(0)[1] == kw_elif: + elif_should_end += 1 + stmt_elif.append(self.tokens[self.pos]) + self.pos += 1 + else: + stmt_elif.append(self.tokens[self.pos]) + self.pos += 1 + Parser(stmt_elif, node_elif).parse() + node_elif_new(self.Node, cond, node_elif) + + elif self.match(kw_else_or_not): # case "唔系" else + self.skip(3) # Skip the "then", "do", "begin" + else_case_end = 0 # The times of case "end" + else_should_end = 1 + node_else = [] + stmt_else = [] + while else_case_end != else_should_end and self.pos < len(self.tokens): + if self.get(0)[1] == kw_if: + else_should_end += 1 + stmt_else.append(self.tokens[self.pos]) + self.pos += 1 + elif self.get(0)[1] == kw_end: + else_case_end += 1 + if else_case_end != else_should_end: + stmt_else.append(self.tokens[self.pos]) + self.pos += 1 + elif self.get(0)[1] == kw_elif: + else_should_end += 1 + stmt_else.append(self.tokens[self.pos]) + self.pos += 1 + else: + stmt_else.append(self.tokens[self.pos]) + self.pos += 1 + Parser(stmt_else, node_else).parse() + node_else_new(self.Node, node_else) + + elif self.match(kw_while_do): + stmt = [] + while self.tokens[self.pos][1][1] != kw_while: + stmt.append(self.tokens[self.pos]) + self.pos += 1 + node_while = [] + self.skip(1) + cond = self.get_value(self.get(0)) + Parser(stmt, node_while).parse() + node_loop_new(self.Node, cond, node_while) + self.skip(2) # Skip the "end" + + elif self.match(kw_function): # Case "function" + if self.get(1)[0] == 'expr': + func_name = self.get_value(self.get(0)) + args = self.get_value(self.get(1)) + self.skip(3) + func_stmt = [] + while self.tokens[self.pos][1][1] != kw_func_end: + func_stmt.append(self.tokens[self.pos]) + self.pos += 1 + node_func = [] + Parser(func_stmt, node_func).parse() + node_func_new(self.Node, func_name, args, node_func) + self.skip(1) # Skip the funcend + else: + func_name = self.get_value(self.get(0)) + self.skip(2) # Skip the funcbegin + func_stmt = [] + while self.tokens[self.pos][1][1] != kw_func_end: + func_stmt.append(self.tokens[self.pos]) + self.pos += 1 + node_func = [] + Parser(func_stmt, node_func).parse() + node_func_new(self.Node, func_name, "None", node_func) + self.skip(1) # Skip the funcend + + elif self.match(kw_turtle_beg): + self.skip(2) # Skip the "do", "begin" + turtle_inst = [] + while self.tokens[self.pos][1][1] != kw_end: + if self.tokens[self.pos][1][0] == 'identifier': + pass + else: + turtle_inst.append(self.get_value(self.tokens[self.pos][1])[1]) + self.pos += 1 + node_turtle_new(self.Node, turtle_inst) + self.skip(1) + + elif self.match(kw_call): + node_call_new(self.Node, self.get_value(self.get(0))) + self.skip(1) + + elif self.match(kw_import): + node_import_new(self.Node, self.get_value(self.get(0))) + self.skip(1) + + elif self.match_type("expr") or self.match_type("identifier"): + if self.match(kw_from): + iterating_var = self.get_value(self.get(-2)) + seq = "(" + str(self.get_value(self.get(0))[1]) + "," \ + + str(self.get_value(self.get(2))[1]) + ")" + self.skip(3) + node_for = [] + for_stmt = [] + for_case_end = 0 + for_should_end = 1 + while for_should_end != for_case_end and self.pos < len(self.tokens): + if (self.get(0)[0] == "expr" or self.get(0)[0] == "identifier") \ + and self.get(1)[1] == kw_from: + for_should_end += 1 + for_stmt.append(self.tokens[self.pos]) + self.pos += 1 + elif self.get(0)[1] == kw_endfor: + for_case_end += 1 + if for_case_end != for_should_end: + for_stmt.append(self.tokens[self.pos]) + self.pos += 1 + else: + for_stmt.append(self.tokens[self.pos]) + self.pos += 1 + Parser(for_stmt, node_for).parse() + node_for_new(self.Node, iterating_var, seq, node_for) + if self.get(0)[1] == kw_lst_assign: + self.skip(1) + list = self.get_value(self.get(-2)) + name = self.get_value(self.get(1)) + node_list_new(self.Node, name, list) + self.skip(2) + + if self.get(0)[1] == kw_do: + self.skip(1) + id = self.get_value(self.get(-2)) + args = self.get_value(self.get(1)) + func = self.get_value(self.get(0)) + node_build_in_func_call_new(self.Node, id, func, args) + self.skip(2) + if self.get(0)[1] == kw_call_begin: + func_name = self.get_value(self.get(-1)) + self.skip(2) + args = self.get_value(self.get(0)) + cons = ['expr', func_name[1] + '(' + args[1] + ')'] + self.skip(1) + if self.get(0)[1] == kw_get_value: + self.skip(1) + v = self.get_value(self.get(0)) + node_let_new(self.Node, v, cons) + else: + node_call_new(self.Node, cons) + + elif self.match(kw_return): + node_return_new(self.Node, self.get_value(self.get(0))) + self.skip(1) + + elif self.match(kw_try): + self.skip(2) # SKip the "begin, do" + should_end = 1 + case_end = 0 + node_try = [] + stmt_try = [] + while case_end != should_end and self.pos < len(self.tokens): + if self.get(0)[1] == kw_end: + case_end += 1 + self.pos += 1 + else: + stmt_try.append(self.tokens[self.pos]) + self.pos += 1 + Parser(stmt_try, node_try).parse() + node_try_new(self.Node, node_try) + + elif self.match(kw_except): + _except = self.get_value(self.get(0)) + self.skip(4) # SKip the "except", "then", "begin", "do" + should_end = 1 + case_end = 0 + node_except = [] + stmt_except = [] + while case_end != should_end and self.pos < len(self.tokens): + if self.get(0)[1] == kw_end: + case_end += 1 + self.pos += 1 + else: + stmt_except.append(self.tokens[self.pos]) + self.pos += 1 + Parser(stmt_except, node_except).parse() + node_except_new(self.Node, _except , node_except) + + elif self.match(kw_finally): + self.skip(2) # Skip the "begin", "do" + should_end = 1 + case_end = 0 + node_finally = [] + stmt_finally = [] + while case_end != should_end and self.pos < len(self.tokens): + if self.get(0)[1] == kw_end: + case_end += 1 + self.pos += 1 + else: + stmt_finally.append(self.tokens[self.pos]) + self.pos += 1 + Parser(stmt_finally, node_finally).parse() + node_finally_new(self.Node, node_finally) + + elif self.match(kw_assert): + node_assert_new(self.Node, self.get_value(self.get(0))) + self.skip(1) + + elif self.match(kw_raise): + node_raise_new(self.Node, self.get_value(self.get(0))) + self.skip(2) + + elif self.match(kw_type): + node_gettype_new(self.Node, self.get_value(self.get(0))) + self.skip(1) + + elif self.match(kw_pass): + self.Node.append(["node_pass"]) + + elif self.match(kw_break): + node_break_new(self.Node) + + elif self.match(kw_class_def): + class_name = self.get_value(self.get(0)) + self.skip(1) + if self.match(kw_extend): + extend = self.get_value(self.get(0)) + self.skip(1) + class_stmt = [] + node_class = [] + while self.tokens[self.pos][1][1] != kw_endclass: + class_stmt.append(self.tokens[self.pos]) + self.pos += 1 + Parser(class_stmt, node_class).parse() + self.skip(1) # Skip the "end" + node_class_new(self.Node, class_name, extend, node_class) + + elif self.match(kw_class_init): + self.skip(1) + attr_lst = self.get_value(self.get(0)) + self.skip(1) + node_attribute_new(self.Node, attr_lst) + + elif self.match(kw_method): + method_name = self.get_value(self.get(0)) + self.skip(1) + # Check if has args + if self.get(0)[0] == "expr": + args = self.get_value(self.get(0)) + self.skip(1) + else: + args = "None" + self.skip(2) # Skip the "do", "begin" + method_stmt = [] + node_method = [] + method_should_end = 1 + method_case_end = 0 + while method_case_end != method_should_end and self.pos < len(self.tokens): + if self.get(0)[1] == kw_end: + method_case_end += 1 + if method_case_end != method_should_end: + method_stmt.append(self.tokens[self.pos]) + self.pos += 1 + elif self.get(0)[1] == kw_if: + method_should_end += 1 + method_stmt.append(self.tokens[self.pos]) + self.pos += 1 + elif self.get(0)[1] == kw_elif: + method_should_end += 1 + method_stmt.append(self.tokens[self.pos]) + self.pos += 1 + elif self.get(0)[1] == kw_else_or_not: + method_should_end += 1 + method_stmt.append(self.tokens[self.pos]) + self.pos += 1 + else: + method_stmt.append(self.tokens[self.pos]) + self.pos += 1 + Parser(method_stmt, node_method).parse() + node_method_new(self.Node, method_name, args, node_method) + + elif self.match(kw_cmd): + node_cmd_new(self.Node, self.get_value(self.get(0))) + self.skip(1) + + elif self.match(kw_model): + model = self.get_value(self.get(0)) + self.skip(1) + self.syntax_check(kw_mod_new, "value") + self.skip(2) + datatest = self.get_value(self.get(0)) + self.skip(1) + node_model_new(self.Node, model, datatest) + + elif self.match(kw_stackinit): + node_stack_new(self.Node, self.get_value(self.get(0))) + self.skip(1) + + elif self.match(kw_push): + self.syntax_check(kw_do, "value") + self.skip(1) + self.Node.append(["stack_push", self.get_value(self.get(0)), self.get_value(self.\ + get(1))]) + self.skip(2) + + elif self.match(kw_pop): + self.syntax_check(kw_do, "value") + self.skip(1) + self.Node.append(["stack_pop", self.get_value(self.get(0)), self.get_value(self.\ + get(1))]) + self.skip(1) + + else: + break