-
Notifications
You must be signed in to change notification settings - Fork 51
/
Copy pathLexer.cppo.ml
178 lines (161 loc) · 5.29 KB
/
Lexer.cppo.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
module Location = struct
include Location
let pp = print_loc
end
(* FIXME: while ppx_import is not compatible with jbuilder, simply copy and paste
token type here as a workaround. *)
(* type token = [@import Tokens.token] [@@deriving show] *)
#include "Tokens.ml"
[@@deriving show]
(* use custom lexbuffer to keep track of source location *)
module Sedlexing = LexBuffer
open LexBuffer
(** Signals a lexing error at the provided source location. *)
exception LexError of (Lexing.position * string)
(** Signals a parsing error at the provided token and its start and end locations. *)
exception ParseError of (token * Lexing.position * Lexing.position)
(** Register exceptions for pretty printing *)
let _ =
Location.register_error_of_exn (function
| LexError (pos, msg) ->
let loc = Location.{ loc_start = pos; loc_end = pos; loc_ghost = false} in
let main = Location.mkloc (fun fmt -> Format.fprintf fmt "%s" msg) loc in
let err = Location.{ kind = Report_error; main = main; sub=[] } in
Some err
| ParseError (token, loc_start, loc_end) ->
let loc = Location.{ loc_start; loc_end; loc_ghost = false} in
let msg =
show_token token
|> Printf.sprintf "parse error while reading token '%s'" in
let main = Location.mkloc (fun fmt -> Format.fprintf fmt "%s" msg) loc in
let err = Location.{ kind = Report_error; main = main; sub=[] } in
Some err
| _ -> None)
let failwith buf s = raise (LexError (buf.pos, s))
let illegal buf c =
Char.escaped c
|> Printf.sprintf "unexpected character in NetKAT expression: '%s'"
|> failwith buf
(** regular expressions *)
let letter = [%sedlex.regexp? 'A'..'Z' | 'a'..'z']
let digit = [%sedlex.regexp? '0'..'9']
let id_init = [%sedlex.regexp? letter | '_']
let id_cont = [%sedlex.regexp? id_init | Chars ".\'" | digit ]
let id = [%sedlex.regexp? id_init, Star id_cont ]
let hex = [%sedlex.regexp? digit | 'a'..'f' | 'A'..'F' ]
let hexnum = [%sedlex.regexp? '0', 'x', Plus hex ]
let decnum = [%sedlex.regexp? Plus digit]
let decbyte = [%sedlex.regexp? (digit,digit,digit) | (digit,digit) | digit ]
let hexbyte = [%sedlex.regexp? hex,hex ]
let blank = [%sedlex.regexp? ' ' | '\t' ]
let newline = [%sedlex.regexp? '\r' | '\n' | "\r\n" ]
(** swallows whitespace and comments *)
let rec garbage buf =
match%sedlex buf with
| newline -> garbage buf
| Plus blank -> garbage buf
| "(*" -> comment 1 buf
| _ -> ()
(* allow nested comments, like OCaml *)
and comment depth buf =
if depth = 0 then garbage buf else
match%sedlex buf with
| eof -> failwith buf "Unterminated comment at EOF"
| "(*" -> comment (depth + 1) buf
| "*)" -> comment (depth - 1) buf
| any -> comment depth buf
| _ -> assert false
(** returns the next token *)
let token ~ppx ~loc_start buf =
garbage buf;
match%sedlex buf with
| eof -> EOF
(* values *)
| decbyte,'.',decbyte,'.',decbyte,'.',decbyte ->
IP4ADDR (ascii buf)
| hexbyte,':',hexbyte,':',hexbyte,':',hexbyte,':',hexbyte,':',hexbyte ->
MAC (ascii buf)
| (hexnum | decnum) -> INT (ascii buf)
| (hexnum | decnum), 'l' -> INT (ascii buf)
| (hexnum | decnum), 'L' -> INT (ascii buf)
| "pipe" -> PIPE
| "query" -> QUERY
| '"', Star (Compl '"'), '"' -> STRING (ascii ~skip:1 ~drop:1 buf)
| "dup" -> DUP
(* predicates *)
| "true" -> TRUE
| "false" -> FALSE
| "and" -> AND
| "or" -> OR
| "not" -> NOT
| '=' -> EQUALS
(* policies *)
| "id" -> ID
| "drop" -> DROP
| "filter" -> FILTER
| ":=" -> ASSIGN
| ';' -> SEMICOLON
| '+' -> PLUS
| '*' -> STAR
| "=>" -> LINK
| "=>>" -> VLINK
| "@" -> AT
| '/' -> SLASH
(* fields *)
| "switch" -> SWITCH
| "port" -> PORT
| "vswitch" -> VSWITCH
| "vport" -> VPORT
| "vfabric" -> VFABRIC
| "ethSrc" -> ETHSRC
| "ethDst" -> ETHDST
| "vlanId" -> VLAN
| "vlanPcp" -> VLANPCP
| "ethTyp" -> ETHTYPE
| "ipProto" -> IPPROTO
| "ip4Src" -> IP4SRC
| "ip4Dst" -> IP4DST
| "tcpSrcPort" -> TCPSRCPORT
| "tcpDstPort" -> TCPDSTPORT
(* portless *)
| "from" -> FROM
| "loc" -> ABSTRACTLOC
(* syntax sugar *)
| "if" -> IF
| "then" -> THEN
| "else" -> ELSE
| "while" -> WHILE
| "do" -> DO
(* parenths *)
| '(' -> LPAR
| ')' -> RPAR
| "begin" -> BEGIN
| "end" -> END
(* meta fields *)
| "let" -> LET
| "var" -> VAR
| "in" -> IN
| '`', id -> METAID (ascii buf ~skip:1)
| _ -> illegal buf (next buf |> Base.Option.value_exn |> Uchar.to_char)
(** wrapper around `token` that records start and end locations *)
let loc_token ~ppx buf =
let () = garbage buf in (* dispose of garbage before recording start location *)
let loc_start = next_loc buf in
let t = token ~ppx ~loc_start buf in
let loc_end = next_loc buf in
(t, loc_start, loc_end)
(** menhir interface *)
type ('token, 'a) parser = ('token, 'a) MenhirLib.Convert.traditional
let parse ?(ppx=false) buf p =
let last_token = ref Lexing.(EOF, dummy_pos, dummy_pos) in
let next_token () = last_token := loc_token ~ppx buf; !last_token in
try MenhirLib.Convert.Simplified.traditional2revised p next_token with
| e ->
begin match e with
| LexError _ | Syntaxerr.Error _ -> raise e
| _ -> raise (ParseError (!last_token))
end
let parse_string ?ppx ?pos s p =
parse ?ppx (LexBuffer.of_ascii_string ?pos s) p
let parse_file ?ppx ~file p =
parse ?ppx (LexBuffer.of_ascii_file file) p