-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathparser.bison
365 lines (312 loc) · 9.81 KB
/
parser.bison
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
/*
Declare token types at the top of the bison file,
causing them to be automatically generated in parser.tab.h
for use by scanner.c.
*/
%token TOKEN_ARRAY
%token TOKEN_BOOLEAN
%token TOKEN_CHAR
%token TOKEN_ELSE
%token TOKEN_FALSE
%token TOKEN_FOR
%token TOKEN_FUNCTION
%token TOKEN_IF
%token TOKEN_INTEGER
%token TOKEN_PRINT
%token TOKEN_RETURN
%token TOKEN_STRING
%token TOKEN_TRUE
%token TOKEN_VOID
%token TOKEN_WHILE
%token <str> TOKEN_IDENT
%token <n> TOKEN_INTEGER_LITERAL
%token <str> TOKEN_CHAR_LITERAL
%token <str> TOKEN_STRING_LITERAL
%token TOKEN_COMMENT_C
%token TOKEN_COMMENT_CPLUSPLUS
%token TOKEN_OP_LEFTPARENTHESS
%token TOKEN_OP_RIGHTPARENTHESS
%token TOKEN_OP_LEFTBRACKET
%token TOKEN_OP_RIGHTBRACKET
%token TOKEN_OP_INCREMENT
%token TOKEN_OP_DECREMENT
%token TOKEN_OP_NOT
%token TOKEN_OP_POWER
%token TOKEN_OP_MUL
%token TOKEN_OP_DIV
%token TOKEN_OP_MOD
%token TOKEN_OP_ADD
%token TOKEN_OP_SUB
%token TOKEN_OP_LE
%token TOKEN_OP_LT
%token TOKEN_OP_GE
%token TOKEN_OP_GT
%token TOKEN_OP_EQ
%token TOKEN_OP_UNEQ
%token TOKEN_OP_AND
%token TOKEN_OP_OR
%token TOKEN_OP_ASSIGN
%token TOKEN_LEFTCURLY
%token TOKEN_RIGHTCURLY
%token TOKEN_COLON
%token TOKEN_COMMA
%token TOKEN_SEMICOLON
%token TOKEN_ERROR
%union {
struct decl *decl;
struct stmt *stmt;
struct type *type;
struct param_list *params;
struct expr *expr;
char *str;
int n;
}
%type <decl> decl func_definition external_decl translation_unit program
%type <stmt> print_stmt return_stmt compound_stmt expr_stmt unmatched_stmt matched_stmt stmt stmt_list
%type <type> type func_type
%type <params> param param_list param_list_opt
%type <expr> constant primary_expr postfix_expr increment_expr unary_expr power_expr mul_expr add_expr relational_expr logical_and_expr logical_or_expr assignment_expr expr expr_opt expr_list expr_list_opt initializer initializer_list
%type <n> unary_operator
%{
#include <stdio.h>
#include "type.h"
#include "expr.h"
#include "stmt.h"
#include "decl.h"
/*
YYSTYPE cannot be used together with %union
#define YYSTYPE char *
*/
/*
Clunky: Manually declare the interface to the scanner generated by flex.
*/
extern char *yytext;
extern int yylex();
extern int yylineno;
extern int yyerror( char *str );
/*
Clunky: Keep the final result of the parse in a global variable,
so that it can be retrieved by main().
*/
struct decl *program = 0;
%}
%%
/* Here is the grammar: translation_unit is the start symbol. */
/* an empty files is legal in cminor: translation_unit can be empty */
program: translation_unit
{ program = $1; return 0; }
;
translation_unit: external_decl translation_unit
{ $1->next = $2; $$ = $1; }
| /* empty */
{ $$ = 0; }
;
external_decl: decl
{ $$ = $1; }
| func_definition
{ $$ = $1; }
;
func_definition: TOKEN_IDENT TOKEN_COLON func_type TOKEN_OP_ASSIGN compound_stmt
{ $$ = decl_create($1, $3, 0, $5, yylineno, 0); }
;
decl: TOKEN_IDENT TOKEN_COLON type TOKEN_OP_ASSIGN initializer TOKEN_SEMICOLON /* declaration with initialization */
{ $$ = decl_create($1, $3, $5, 0, yylineno, 0); }
| TOKEN_IDENT TOKEN_COLON type TOKEN_SEMICOLON /* declaration without initialization */
{ $$ = decl_create($1, $3, 0, 0, yylineno, 0); }
| TOKEN_IDENT TOKEN_COLON func_type TOKEN_SEMICOLON /* function prototype */
{ $$ = decl_create($1, $3, 0, 0, yylineno, 0); }
;
func_type: TOKEN_FUNCTION type TOKEN_OP_LEFTPARENTHESS param_list_opt TOKEN_OP_RIGHTPARENTHESS
{ $$ = type_create(TYPE_FUNCTION, $4, 0, $2, yylineno, 1); }
;
initializer: expr
{ $$ = $1; }
| TOKEN_LEFTCURLY initializer_list TOKEN_RIGHTCURLY
{ $$ = expr_create(EXPR_LEFTCURLY, 0, $2, yylineno); }
;
initializer_list: initializer
{ $$ = $1; }
| initializer_list TOKEN_COMMA initializer
{ $$ = expr_create(EXPR_COMMA, $1, $3, yylineno); }
;
param_list_opt: /* empty */
{ $$ = 0; }
| param_list
{ $$ = $1; }
;
param_list: param
{ $$ = $1; }
| param TOKEN_COMMA param_list /* */
{ $1->next = $3; $$ = $1; }
;
param: TOKEN_IDENT TOKEN_COLON type
{ $$ = param_list_create($1, $3, 0, yylineno); }
type: TOKEN_INTEGER
{ $$ = type_create(TYPE_INTEGER, 0, 0, 0, yylineno, 1); }
| TOKEN_CHAR
{ $$ = type_create(TYPE_CHARACTER, 0, 0, 0, yylineno, 1); }
| TOKEN_BOOLEAN
{ $$ = type_create(TYPE_BOOLEAN, 0, 0, 0, yylineno, 1); }
| TOKEN_STRING
{ $$ = type_create(TYPE_STRING, 0, 0, 0, yylineno, 1); }
| TOKEN_VOID
{ $$ = type_create(TYPE_VOID, 0, 0, 0, yylineno, 1); }
| TOKEN_ARRAY TOKEN_OP_LEFTBRACKET TOKEN_OP_RIGHTBRACKET type
{ $$ = type_create(TYPE_ARRAY, 0, 0, $4, yylineno, 1); }
| TOKEN_ARRAY TOKEN_OP_LEFTBRACKET logical_or_expr TOKEN_OP_RIGHTBRACKET type
{ $$ = type_create(TYPE_ARRAY, 0, $3, $5, yylineno, 1); }
;
stmt_list: /* empty */
{ $$ = 0; }
| stmt stmt_list
{ $1->next = $2; $$ = $1; }
;
stmt: matched_stmt
{ $$ = $1; }
| unmatched_stmt
{ $$ = $1; }
;
matched_stmt: external_decl
{ $$ = stmt_create(STMT_DECL, $1, 0, 0, 0, 0, 0, yylineno, 0); }
| expr_stmt
{ $$ = $1; }
| compound_stmt
{ $$ = $1; }
| return_stmt
{ $$ = $1; }
| print_stmt
{ $$ = $1; }
| TOKEN_FOR TOKEN_OP_LEFTPARENTHESS expr_opt TOKEN_SEMICOLON expr_opt TOKEN_SEMICOLON expr_opt TOKEN_OP_RIGHTPARENTHESS matched_stmt
{ $$ = stmt_create(STMT_FOR, 0, $3, $5, $7, $9, 0, yylineno, 0); }
| TOKEN_IF TOKEN_OP_LEFTPARENTHESS expr TOKEN_OP_RIGHTPARENTHESS matched_stmt TOKEN_ELSE matched_stmt
{ $$ = stmt_create(STMT_IF_ELSE, 0, 0, $3, 0, $5, $7, yylineno, 0); }
;
unmatched_stmt: TOKEN_IF TOKEN_OP_LEFTPARENTHESS expr TOKEN_OP_RIGHTPARENTHESS stmt
{ $$ = stmt_create(STMT_IF_ELSE, 0, 0, $3, 0, $5, 0, yylineno, 0); }
| TOKEN_IF TOKEN_OP_LEFTPARENTHESS expr TOKEN_OP_RIGHTPARENTHESS matched_stmt TOKEN_ELSE unmatched_stmt
{ $$ = stmt_create(STMT_IF_ELSE, 0, 0, $3, 0, $5, $7, yylineno, 0); }
| TOKEN_FOR TOKEN_OP_LEFTPARENTHESS expr_opt TOKEN_SEMICOLON expr_opt TOKEN_SEMICOLON expr_opt TOKEN_OP_RIGHTPARENTHESS unmatched_stmt
{ $$ = stmt_create(STMT_FOR, 0, $3, $5, $7, $9, 0, yylineno, 0); }
;
expr_stmt: expr TOKEN_SEMICOLON
{ $$ = stmt_create(STMT_EXPR, 0, 0, $1, 0, 0, 0, yylineno, 0); }
;
compound_stmt: TOKEN_LEFTCURLY stmt_list TOKEN_RIGHTCURLY
{ $$ = stmt_create(STMT_BLOCK, 0, 0, 0, 0, $2, 0, yylineno, 0); }
;
return_stmt: TOKEN_RETURN expr_opt TOKEN_SEMICOLON
{ $$ = stmt_create(STMT_RETURN, 0, 0, $2, 0, 0, 0, yylineno, 0); }
;
print_stmt: TOKEN_PRINT expr_list_opt TOKEN_SEMICOLON
{ $$ = stmt_create(STMT_PRINT, 0, 0, $2, 0, 0, 0, yylineno, 0); }
;
expr_list_opt: /* empty */
{ $$ = 0; }
| expr_list
{ $$ = $1; }
;
expr_list: expr
| expr_list TOKEN_COMMA expr
{ $$ = expr_create(EXPR_COMMA, $1, $3, yylineno); }
;
expr_opt: /* empty */
{ $$ = 0; }
| expr
{ $$ = $1; }
;
expr: assignment_expr
{ $$ = $1; }
;
assignment_expr: logical_or_expr
| unary_expr TOKEN_OP_ASSIGN assignment_expr
{ $$ = expr_create(EXPR_ASSIGN, $1, $3, yylineno); }
;
logical_or_expr: logical_and_expr
| logical_or_expr TOKEN_OP_OR logical_and_expr
{ $$ = expr_create(EXPR_OR, $1, $3, yylineno); }
;
logical_and_expr: relational_expr
| logical_and_expr TOKEN_OP_AND relational_expr
{ $$ = expr_create(EXPR_AND, $1, $3, yylineno); }
;
/* this is different from C. In C, relational ops have higher precedence than equality ops */
relational_expr: add_expr
| relational_expr TOKEN_OP_LT add_expr
{ $$ = expr_create(EXPR_LT, $1, $3, yylineno); }
| relational_expr TOKEN_OP_LE add_expr
{ $$ = expr_create(EXPR_LE, $1, $3, yylineno); }
| relational_expr TOKEN_OP_GT add_expr
{ $$ = expr_create(EXPR_GT, $1, $3, yylineno); }
| relational_expr TOKEN_OP_GE add_expr
{ $$ = expr_create(EXPR_GE, $1, $3, yylineno); }
| relational_expr TOKEN_OP_EQ add_expr
{ $$ = expr_create(EXPR_EQ, $1, $3, yylineno); }
| relational_expr TOKEN_OP_UNEQ add_expr
{ $$ = expr_create(EXPR_UNEQ, $1, $3, yylineno); }
;
add_expr: mul_expr
| add_expr TOKEN_OP_ADD mul_expr
{ $$ = expr_create(EXPR_ADD, $1, $3, yylineno); }
| add_expr TOKEN_OP_SUB mul_expr
{ $$ = expr_create(EXPR_SUB, $1, $3, yylineno); }
;
mul_expr: power_expr
| mul_expr TOKEN_OP_MUL power_expr
{ $$ = expr_create(EXPR_MUL, $1, $3, yylineno); }
| mul_expr TOKEN_OP_DIV power_expr
{ $$ = expr_create(EXPR_DIV, $1, $3, yylineno); }
| mul_expr TOKEN_OP_MOD power_expr
{ $$ = expr_create(EXPR_MOD, $1, $3, yylineno); }
;
power_expr: unary_expr
| power_expr TOKEN_OP_POWER unary_expr
{ $$ = expr_create(EXPR_POWER, $1, $3, yylineno); }
;
unary_expr: increment_expr
| unary_operator unary_expr
{ $$ = expr_create($1, 0, $2, yylineno); }
;
unary_operator: TOKEN_OP_SUB
{ $$ = EXPR_UNARY_NEG; }
| TOKEN_OP_NOT
{ $$ = EXPR_NOT; }
;
increment_expr: postfix_expr
| increment_expr TOKEN_OP_INCREMENT
{ $$ = expr_create(EXPR_INCREMENT, $1, 0, yylineno); }
| increment_expr TOKEN_OP_DECREMENT
{ $$ = expr_create(EXPR_DECREMENT, $1, 0, yylineno); }
;
postfix_expr: primary_expr
| postfix_expr TOKEN_OP_LEFTBRACKET expr TOKEN_OP_RIGHTBRACKET /* array subscript */
{ $$ = expr_create(EXPR_LEFTBRACKET, $1, $3, yylineno); }
| postfix_expr TOKEN_OP_LEFTPARENTHESS expr_list_opt TOKEN_OP_RIGHTPARENTHESS /* function call */
{ $$ = expr_create(EXPR_LEFTPARENTHESS, $1, $3, yylineno); }
;
primary_expr: constant
| TOKEN_IDENT
{ $$ = expr_create_name($1, yylineno); }
| TOKEN_OP_LEFTPARENTHESS expr TOKEN_OP_RIGHTPARENTHESS /* grouping */
{ $$ = $2; }
;
constant: TOKEN_INTEGER_LITERAL
{ $$ = expr_create_integer_literal($1, yylineno); }
| TOKEN_CHAR_LITERAL
{ $$ = expr_create_character_literal($1, yylineno); }
| TOKEN_STRING_LITERAL
{ $$ = expr_create_string_literal($1, yylineno); }
| TOKEN_TRUE
{ $$ = expr_create_boolean_literal(1, yylineno); }
| TOKEN_FALSE
{ $$ = expr_create_boolean_literal(0, yylineno); }
;
%%
/*
This function will be called by bison if the parse should
encounter an error. In principle, "str" will contain something
useful. In practice, it often does not.
*/
int yyerror( char *str )
{
printf("parse error: %s (line%d: %s)!\n",str, yylineno, yytext);
}