-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathtokenizer.h
95 lines (81 loc) · 2.22 KB
/
tokenizer.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
#ifndef TOKENIZER_H
#define TOKENIZER_H
#define MAX_TOK_LEN 4096
#define MAX_UNGETC 8
#include <stdint.h>
#include <stddef.h>
#include <stdio.h>
struct tokenizer_getc_buf {
int buf[MAX_UNGETC];
size_t cnt, buffered;
};
enum markertype {
MT_SINGLELINE_COMMENT_START = 0,
MT_MULTILINE_COMMENT_START = 1,
MT_MULTILINE_COMMENT_END = 2,
MT_MAX = MT_MULTILINE_COMMENT_END
};
#define MAX_CUSTOM_TOKENS 32
enum tokentype {
TT_IDENTIFIER = 1,
TT_SQSTRING_LIT,
TT_DQSTRING_LIT,
TT_ELLIPSIS,
TT_HEX_INT_LIT,
TT_OCT_INT_LIT,
TT_DEC_INT_LIT,
TT_FLOAT_LIT,
TT_SEP,
/* errors and similar */
TT_UNKNOWN,
TT_OVERFLOW,
TT_WIDECHAR_LIT,
TT_WIDESTRING_LIT,
TT_EOF,
TT_CUSTOM = 1000 /* start user defined tokentype values */
};
const char* tokentype_to_str(enum tokentype tt);
struct token {
enum tokentype type;
uint32_t line;
uint32_t column;
int value;
};
enum tokenizer_flags {
TF_PARSE_STRINGS = 1 << 0,
TF_PARSE_WIDE_STRINGS = 1 << 1,
};
struct tokenizer {
FILE *input;
uint32_t line;
uint32_t column;
int flags;
int custom_count;
int peeking;
const char *custom_tokens[MAX_CUSTOM_TOKENS];
char buf[MAX_TOK_LEN];
size_t bufsize;
struct tokenizer_getc_buf getc_buf;
const char* marker[MT_MAX+1];
const char* filename;
struct token peek_token;
};
void tokenizer_init(struct tokenizer *t, FILE* in, int flags);
void tokenizer_set_filename(struct tokenizer *t, const char*);
void tokenizer_set_flags(struct tokenizer *t, int flags);
int tokenizer_get_flags(struct tokenizer *t);
off_t tokenizer_ftello(struct tokenizer *t);
void tokenizer_register_marker(struct tokenizer*, enum markertype, const char*);
void tokenizer_register_custom_token(struct tokenizer*, int tokentype, const char*);
int tokenizer_next(struct tokenizer *t, struct token* out);
int tokenizer_peek_token(struct tokenizer *t, struct token* out);
int tokenizer_peek(struct tokenizer *t);
void tokenizer_skip_until(struct tokenizer *t, const char *marker);
int tokenizer_skip_chars(struct tokenizer *t, const char *chars, int *count);
int tokenizer_read_until(struct tokenizer *t, const char* marker, int stop_at_nl);
int tokenizer_rewind(struct tokenizer *t);
#ifdef __GNUC__
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
#endif
#pragma RcB2 DEP "tokenizer.c"
#endif