-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
24 changed files
with
992 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,7 @@ | ||
merr | ||
==== | ||
|
||
Error message generator for state/token pairs. | ||
Error message generator for state/token pairs. | ||
|
||
Based on ideas from Clinton Jeffery's "merr" tool | ||
at http://unicon.sourceforge.net/merr/. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
"automerr": include | ||
"common": include | ||
"merr": include |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
"a_ast.ml": syntax_camlp4o, pkg_sexplib.syntax | ||
"automerr.ml": pkg_sexplib |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
open Sexplib.Conv | ||
|
||
type statenum = int with sexp | ||
type nonterminal = string with sexp | ||
type terminal = string with sexp | ||
|
||
type right_side = | ||
| Nonterminal of nonterminal | ||
| Terminal of terminal | ||
| CurrentPosition | ||
| EndOfInput | ||
| Bracketed of right_side list | ||
with sexp | ||
|
||
type production = | ||
| Production of nonterminal * right_side list | ||
with sexp | ||
|
||
type action = | ||
| Shift of statenum | ||
| Reduce of production | ||
| Accept of nonterminal | ||
with sexp | ||
|
||
type jump = | ||
| Jump of right_side * action | ||
with sexp | ||
|
||
type state = | ||
| State of statenum * production list * jump list | ||
with sexp | ||
|
||
type states = state list with sexp |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
let expected state = | ||
match state with | ||
| 0 -> ["\"state\"";] | ||
| 1 -> ["\"integer\"";] | ||
| 2 -> ["\":\"";] | ||
| 3 -> ["\"new line\"";] | ||
| 4 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";] | ||
| 13 -> ["\"new line\"";] | ||
| 15 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";] | ||
| 16 -> ["\"(\"";] | ||
| 17 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";"\"terminal\"";] | ||
| 20 -> ["\",\"";"\")\"";] | ||
| 22 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";"\"terminal\"";] | ||
| 25 -> ["\"'\"";] | ||
| 28 -> ["\"->\"";] | ||
| 29 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"[\"";"\"nonterminal\"";"\".\"";"\"terminal\"";] | ||
| 32 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"[\"";"\"nonterminal\"";"\".\"";"\"terminal\"";] | ||
| 34 -> ["\"]\"";] | ||
| 36 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"[\"";"\"nonterminal\"";"\".\"";"\"terminal\"";] | ||
| 41 -> ["\"new line\"";] | ||
| 43 -> ["\"--\"";] | ||
| 44 -> ["\"on\"";] | ||
| 45 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"nonterminal\"";"\".\"";"\"terminal\"";] | ||
| 46 -> ["\"accept\"";"\"reduce\"";"\"shift\"";] | ||
| 47 -> ["\"to\"";] | ||
| 48 -> ["\"state\"";] | ||
| 49 -> ["\"integer\"";] | ||
| 50 -> ["\"new line\"";] | ||
| 52 -> ["\"production\"";] | ||
| 53 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";] | ||
| 55 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";] | ||
| 56 -> ["\"new line\"";] | ||
| 60 -> ["\"--\"";] | ||
| 62 -> ["\"state\"";] | ||
| 65 -> ["\"end of input\"";] | ||
| _ -> [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
{ | ||
open A_parser | ||
} | ||
|
||
|
||
let d = ['0'-'9'] | ||
|
||
let lcase = ['a'-'z'] | ||
let ucase = ['A'-'Z'] | ||
let idchar = (lcase | ucase | d | '_') | ||
|
||
let nterm = lcase idchar* | ||
let term = ucase (ucase | d | '_')* | ||
|
||
rule token = parse | ||
| "accept" { KW_ACCEPT } | ||
| "on" | ||
| "On" { KW_ON } | ||
| "production" { KW_PRODUCTION } | ||
| "reduce" { KW_REDUCE } | ||
| "shift" { KW_SHIFT } | ||
| "state" | ||
| "State" { KW_STATE } | ||
| "Conflict" { KW_CONFLICT } | ||
| "to" { KW_TO } | ||
|
||
| "**" { TK_STARSTAR } | ||
| "," { TK_COMMA } | ||
| "(" { TK_LBRACK } | ||
| ")" { TK_RBRACK } | ||
| ":" { TK_COLON } | ||
| "'" { TK_SQUOT } | ||
| "." { TK_PERIOD } | ||
| "[" { TK_LSQBRACK } | ||
| "]" { TK_RSQBRACK } | ||
| "#" { TK_HASH } | ||
| "--" { TK_MINMIN } | ||
| "->" { TK_ARROW } | ||
|
||
| nterm as id { TK_NTERM id } | ||
| term as id { TK_TERM id } | ||
| d+ as num { TK_INTEGER (int_of_string num) } | ||
|
||
| '\n' { TK_NEWLINE } | ||
| ' ' { token lexbuf } | ||
|
||
| _ as c { failwith ("invalid character: " ^ Char.escaped c) } | ||
|
||
| eof { EOF } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
%{ | ||
open A_ast | ||
%} | ||
|
||
%token EOF | ||
|
||
%token TK_NEWLINE | ||
%token TK_COLON | ||
%token TK_SQUOT | ||
%token TK_PERIOD | ||
%token TK_ARROW | ||
%token TK_LSQBRACK | ||
%token TK_RSQBRACK | ||
%token TK_HASH | ||
%token TK_MINMIN | ||
%token TK_LBRACK | ||
%token TK_RBRACK | ||
%token TK_COMMA | ||
%token TK_STARSTAR | ||
|
||
%token KW_ACCEPT | ||
%token KW_ON | ||
%token KW_PRODUCTION | ||
%token KW_REDUCE | ||
%token KW_SHIFT | ||
%token KW_STATE | ||
%token KW_CONFLICT | ||
%token KW_TO | ||
|
||
%token<string> TK_NTERM | ||
%token<string> TK_TERM | ||
%token<int> TK_INTEGER | ||
|
||
|
||
%start parse | ||
%type<A_ast.states> parse | ||
|
||
%% | ||
|
||
parse | ||
: state+ EOF | ||
{ $1 } | ||
|
||
|
||
state | ||
: KW_STATE TK_INTEGER TK_COLON TK_NEWLINE state_description TK_NEWLINE | ||
{ State ($2, fst $5, snd $5) } | ||
|
||
|
||
state_description | ||
: production+ jump_description+ conflict? | ||
{ $1, $2 } | ||
|
||
|
||
production | ||
: left_hand_side TK_ARROW right_hand_side TK_NEWLINE | ||
{ Production ($1, $3) } | ||
|
||
|
||
left_hand_side | ||
: nonterminal | ||
{ $1 } | ||
| nonterminal TK_SQUOT | ||
{ $1 ^ "'" } | ||
|
||
|
||
nonterminal | ||
: nonterminal_word | ||
{ $1 } | ||
| nonterminal_word TK_LBRACK arguments TK_RBRACK | ||
{ $1 ^ "(" ^ $3 ^ ")" } | ||
|
||
|
||
arguments | ||
: argument | ||
{ $1 } | ||
| arguments TK_COMMA argument | ||
{ $1 ^ "," ^ $3 } | ||
|
||
|
||
argument | ||
: nonterminal { $1 } | ||
| TK_TERM { $1 } | ||
|
||
|
||
right_hand_side | ||
: rhs_part* | ||
{ $1 } | ||
|
||
|
||
rhs_part | ||
: input | ||
{ $1 } | ||
| TK_LSQBRACK right_hand_side TK_RSQBRACK | ||
{ Bracketed ($2) } | ||
|
||
|
||
nonterminal_word | ||
: TK_NTERM { $1 } | ||
| KW_ACCEPT { "accept" } | ||
| KW_ON { "On" } | ||
| KW_PRODUCTION { "production" } | ||
| KW_REDUCE { "reduce" } | ||
| KW_SHIFT { "shift" } | ||
| KW_STATE { "state" } | ||
| KW_TO { "to" } | ||
|
||
|
||
input | ||
: nonterminal | ||
{ Nonterminal $1 } | ||
| TK_TERM | ||
{ Terminal $1 } | ||
| TK_PERIOD | ||
{ CurrentPosition } | ||
| TK_HASH | ||
{ EndOfInput } | ||
|
||
|
||
jump_description | ||
: TK_MINMIN KW_ON input action | ||
{ Jump ($3, $4) } | ||
|
||
|
||
conflict | ||
: TK_STARSTAR KW_CONFLICT KW_ON TK_TERM+ TK_NEWLINE | ||
{ 0 } | ||
|
||
|
||
action | ||
: KW_SHIFT KW_TO KW_STATE TK_INTEGER TK_NEWLINE | ||
{ Shift ($4) } | ||
| KW_REDUCE KW_PRODUCTION production | ||
{ Reduce ($3) } | ||
| KW_ACCEPT nonterminal TK_NEWLINE | ||
{ Accept ($2) } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,96 @@ | ||
open A_ast | ||
open Sexplib.Sexp | ||
|
||
let bprintf = Printf.bprintf | ||
|
||
let codegen out strings states = | ||
bprintf out "let expected state =\n"; | ||
bprintf out " match state with\n"; | ||
List.iter (fun (State (state, productions, jumps)) -> | ||
let terms = | ||
List.fold_left (fun terms jump -> | ||
match jump with | ||
| Jump ((Terminal term), (Shift _)) -> | ||
term :: terms | ||
| _ -> terms | ||
) [] jumps | ||
in | ||
|
||
let lookup table key = | ||
try | ||
Hashtbl.find table key | ||
with Not_found -> | ||
key | ||
in | ||
|
||
match terms with | ||
| _::_ -> | ||
bprintf out " | %d -> [" state; | ||
List.iter (bprintf out "\"%s\";") (List.map String.escaped (List.map (lookup strings) terms)); | ||
bprintf out "]\n" | ||
| _ -> () | ||
) states; | ||
bprintf out " | _ -> []\n" | ||
|
||
|
||
let open_file file = if file = "-" then stdin else open_in file | ||
|
||
|
||
let parse_strings strings = | ||
let lexbuf = Lexing.from_channel (open_file strings) in | ||
let table = Hashtbl.create 10 in | ||
begin try | ||
while true do | ||
match T_lexer.token lexbuf with | ||
| (_, term, str) -> Hashtbl.add table term str | ||
done | ||
with T_lexer.Eof -> | ||
() | ||
end; | ||
table | ||
|
||
let debug token lexbuf = | ||
let tok = token lexbuf in | ||
let open A_parser in | ||
let str = | ||
match tok with | ||
| EOF -> "EOF" | ||
|
||
| TK_NEWLINE -> "TK_NEWLINE" | ||
| TK_COLON -> "TK_COLON" | ||
| TK_SQUOT -> "TK_SQUOT" | ||
| TK_PERIOD -> "TK_PERIOD" | ||
| TK_ARROW -> "TK_ARROW" | ||
| TK_LSQBRACK -> "TK_LSQBRACK" | ||
| TK_RSQBRACK -> "TK_RSQBRACK" | ||
| TK_HASH -> "TK_HASH" | ||
| TK_MINMIN -> "TK_MINMIN" | ||
| TK_LBRACK -> "TK_LBRACK" | ||
| TK_RBRACK -> "TK_RBRACK" | ||
| TK_COMMA -> "TK_COMMA" | ||
| TK_STARSTAR -> "TK_STARSTAR" | ||
|
||
| KW_ACCEPT -> "KW_ACCEPT" | ||
| KW_ON -> "KW_ON" | ||
| KW_PRODUCTION -> "KW_PRODUCTION" | ||
| KW_REDUCE -> "KW_REDUCE" | ||
| KW_SHIFT -> "KW_SHIFT" | ||
| KW_STATE -> "KW_STATE" | ||
| KW_CONFLICT -> "KW_CONFLICT" | ||
| KW_TO -> "KW_TO" | ||
|
||
| TK_NTERM s -> "TK_NTERM " ^ s | ||
| TK_TERM s -> "TK_TERM " ^ s | ||
| TK_INTEGER d -> "TK_INTEGER " ^ string_of_int d | ||
in | ||
Printf.printf "%s\n" str; | ||
tok | ||
|
||
let parse_states states = | ||
let lexbuf = Lexing.from_channel (open_file states) in | ||
try | ||
A_parser.parse A_lexer.token lexbuf | ||
with A_parser.StateError (token, state) -> | ||
let expected = A_errors.expected state in | ||
Printf.fprintf stderr "expected one of: %s\n" (String.concat ", " expected); | ||
raise Exit |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
{ | ||
exception Eof | ||
} | ||
|
||
|
||
let d = ['0'-'9'] | ||
let ucase = ['A'-'Z'] | ||
|
||
let term = ucase (ucase | d | '_')* | ||
|
||
let toktype = '<' [^'>']+ '>' | ||
|
||
let dstring = '"' ('\\' _ | [^ '\\' '"' ])* '"' | ||
|
||
let ws = [' ' '\t'] | ||
|
||
rule token = parse | ||
| "%token" (toktype as ty)? ws+ (term as term) ws+ (dstring as str) | ||
{ (ty, term, str) } | ||
|
||
| "%%" { raise Eof } | ||
|
||
| '\n' { token lexbuf } | ||
|
||
| _ as c { failwith (Char.escaped c) } |
Oops, something went wrong.