-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFleaLexer.py
116 lines (92 loc) · 3.32 KB
/
FleaLexer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
"""
TODO: take "inquotes = False" common
"""
op_keywords = {
"add": "[NUMBER][SUBSTRING]",
"remove": "[NUMBER][BAREWORD]",
"prioritize": "[NUMBER][NUMBER]",
"edit": "[NUMBER][NEWVALUE][COLON][NUMBER][COMMA][SUBSTRING]",
"move": "[NUMBER][BAREWORD][NUMBER][BAREWORD]",
"display": "[BAREWORD]/[ALL]",
"save": "[BAREWORD]/[ALL]",
"exit": None,
"help": None
}
KEYWORDS = {
"to": "NEWVALUE",
"all": "ALL"
}
for word, args in op_keywords.items():
KEYWORDS[word] = word.upper()
def tokenize(text, prefix):
text += '\n'
token_txt: str = ""
t_len: int = 0
kind: str = ""
inquotes: bool = False
stored_tokens: list = []
if text[0] != prefix:
raise ValueError(f"Invalid prefix! The defined prefix is {prefix}")
for pos, char in enumerate(text, start = 1):
kind = "ILLEGAL"
match char:
case prefix if pos == 1 and inquotes == False:
t_len += 1
kind = "PREFIX"
token_txt = text[pos - t_len: pos]
t_len = 0
case colon if char == ':' and inquotes == False:
t_len+=1
kind = "COLON"
token_txt = text[pos-t_len : pos]
t_len = 0
case comma if char == ',' and inquotes == False:
t_len+=1
kind = "COMMA"
token_txt = text[pos-t_len : pos]
t_len = 0
case letter if char.isalpha() and inquotes == False:
t_len+=1
kind = None
if pos < len(text) and not text[pos].isalpha():
token_txt = text[pos-t_len : pos]
if token_txt in KEYWORDS:
kind = KEYWORDS[token_txt]
if token_txt[0].isupper():
kind = "BAREWORD"
if not (token_txt in KEYWORDS or token_txt[0].isupper()):
raise ValueError(f"Invalid Keyword: {token_txt}")
t_len = 0
case '"':
inquotes = not inquotes
case substring if inquotes == True:
t_len+=1
kind = None
if pos < len(text) and text[pos] == '"':
kind = "SUBSTRING"
token_txt = text[pos - t_len: pos]
t_len = 0
if pos == len(text):
raise ValueError("Unterminated String")
case digit if char.isdigit() and inquotes == False:
t_len+=1
if pos <= len(text):
kind = "NUMBER"
token_txt = text[pos-t_len: pos]
t_len = 0
if not char.isspace() and kind == "ILLEGAL" and char != '"' and char != ",":
t_len+=1
token_txt = text[pos-t_len: pos]
t_len = 0
raise ValueError(f"Illegal token '{token_txt}'")
if kind != None and kind != "ILLEGAL":
stored_tokens.append([kind, token_txt])
kind = None
return tuple(stored_tokens)
if __name__ == "__main__":
while True:
command = input('> ')
try:
print(tokenize(text = command, prefix = '/'))
except ValueError as e:
print(str(e))