Initial commit

pippijn · May 8, 2012 · aee3b65 · aee3b65
1 parent e308d87
commit aee3b65
Show file tree

Hide file tree

Showing 24 changed files with 992 additions and 1 deletion.
diff --git a/README.md b/README.md
@@ -1,4 +1,7 @@
 merr
 ====
 
-Error message generator for state/token pairs.
+Error message generator for state/token pairs.
+
+Based on ideas from Clinton Jeffery's "merr" tool
+at http://unicon.sourceforge.net/merr/.
diff --git a/_tags b/_tags
@@ -0,0 +1,3 @@
+"automerr": include
+"common": include
+"merr": include
diff --git a/automerr/_tags b/automerr/_tags
@@ -0,0 +1,2 @@
+"a_ast.ml": syntax_camlp4o, pkg_sexplib.syntax
+"automerr.ml": pkg_sexplib
diff --git a/automerr/a_ast.ml b/automerr/a_ast.ml
@@ -0,0 +1,33 @@
+open Sexplib.Conv
+
+type statenum = int with sexp
+type nonterminal = string with sexp
+type terminal = string with sexp
+
+type right_side =
+  | Nonterminal of nonterminal
+  | Terminal of terminal
+  | CurrentPosition
+  | EndOfInput
+  | Bracketed of right_side list
+  with sexp
+
+type production =
+  | Production of nonterminal * right_side list
+  with sexp
+
+type action =
+  | Shift of statenum
+  | Reduce of production
+  | Accept of nonterminal
+  with sexp
+
+type jump =
+  | Jump of right_side * action
+  with sexp
+
+type state =
+  | State of statenum * production list * jump list
+  with sexp
+
+type states = state list with sexp
diff --git a/automerr/a_errors.ml b/automerr/a_errors.ml
@@ -0,0 +1,36 @@
+let expected state =
+  match state with
+  | 0 -> ["\"state\"";]
+  | 1 -> ["\"integer\"";]
+  | 2 -> ["\":\"";]
+  | 3 -> ["\"new line\"";]
+  | 4 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";]
+  | 13 -> ["\"new line\"";]
+  | 15 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";]
+  | 16 -> ["\"(\"";]
+  | 17 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";"\"terminal\"";]
+  | 20 -> ["\",\"";"\")\"";]
+  | 22 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";"\"terminal\"";]
+  | 25 -> ["\"'\"";]
+  | 28 -> ["\"->\"";]
+  | 29 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"[\"";"\"nonterminal\"";"\".\"";"\"terminal\"";]
+  | 32 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"[\"";"\"nonterminal\"";"\".\"";"\"terminal\"";]
+  | 34 -> ["\"]\"";]
+  | 36 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"[\"";"\"nonterminal\"";"\".\"";"\"terminal\"";]
+  | 41 -> ["\"new line\"";]
+  | 43 -> ["\"--\"";]
+  | 44 -> ["\"on\"";]
+  | 45 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"nonterminal\"";"\".\"";"\"terminal\"";]
+  | 46 -> ["\"accept\"";"\"reduce\"";"\"shift\"";]
+  | 47 -> ["\"to\"";]
+  | 48 -> ["\"state\"";]
+  | 49 -> ["\"integer\"";]
+  | 50 -> ["\"new line\"";]
+  | 52 -> ["\"production\"";]
+  | 53 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";]
+  | 55 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";]
+  | 56 -> ["\"new line\"";]
+  | 60 -> ["\"--\"";]
+  | 62 -> ["\"state\"";]
+  | 65 -> ["\"end of input\"";]
+  | _ -> []
diff --git a/automerr/a_lexer.mll b/automerr/a_lexer.mll
@@ -0,0 +1,49 @@
+{
+  open A_parser
+}
+
+
+let d = ['0'-'9']
+
+let lcase = ['a'-'z']
+let ucase = ['A'-'Z']
+let idchar = (lcase | ucase | d | '_')
+
+let nterm = lcase idchar*
+let term = ucase (ucase | d | '_')*
+
+rule token = parse
+  | "accept"			{ KW_ACCEPT }
+  | "on"
+  | "On"			{ KW_ON }
+  | "production"		{ KW_PRODUCTION }
+  | "reduce"			{ KW_REDUCE }
+  | "shift"			{ KW_SHIFT }
+  | "state"
+  | "State"			{ KW_STATE }
+  | "Conflict"			{ KW_CONFLICT }
+  | "to"			{ KW_TO }
+
+  | "**"			{ TK_STARSTAR }
+  | ","				{ TK_COMMA }
+  | "("				{ TK_LBRACK }
+  | ")"				{ TK_RBRACK }
+  | ":"				{ TK_COLON }
+  | "'"				{ TK_SQUOT }
+  | "."				{ TK_PERIOD }
+  | "["				{ TK_LSQBRACK }
+  | "]"				{ TK_RSQBRACK }
+  | "#"				{ TK_HASH }
+  | "--"			{ TK_MINMIN }
+  | "->"			{ TK_ARROW }
+
+  | nterm as id			{ TK_NTERM id }
+  | term as id			{ TK_TERM id }
+  | d+ as num			{ TK_INTEGER (int_of_string num) }
+
+  | '\n'			{ TK_NEWLINE }
+  | ' '				{ token lexbuf }
+
+  | _ as c			{ failwith ("invalid character: " ^ Char.escaped c) }
+
+  | eof				{ EOF }
diff --git a/automerr/a_parser.mly b/automerr/a_parser.mly
@@ -0,0 +1,136 @@
+%{
+  open A_ast
+%}
+
+%token EOF
+
+%token TK_NEWLINE
+%token TK_COLON
+%token TK_SQUOT
+%token TK_PERIOD
+%token TK_ARROW
+%token TK_LSQBRACK
+%token TK_RSQBRACK
+%token TK_HASH
+%token TK_MINMIN
+%token TK_LBRACK
+%token TK_RBRACK
+%token TK_COMMA
+%token TK_STARSTAR
+
+%token KW_ACCEPT
+%token KW_ON
+%token KW_PRODUCTION
+%token KW_REDUCE
+%token KW_SHIFT
+%token KW_STATE
+%token KW_CONFLICT
+%token KW_TO
+
+%token<string> TK_NTERM
+%token<string> TK_TERM
+%token<int> TK_INTEGER
+
+
+%start parse
+%type<A_ast.states> parse
+
+%%
+
+parse
+	: state+ EOF
+		{ $1 }
+
+
+state
+	: KW_STATE TK_INTEGER TK_COLON TK_NEWLINE state_description TK_NEWLINE
+		{ State ($2, fst $5, snd $5) }
+
+
+state_description
+	: production+ jump_description+ conflict?
+		{ $1, $2 }
+
+
+production
+	: left_hand_side TK_ARROW right_hand_side TK_NEWLINE
+		{ Production ($1, $3) }
+
+
+left_hand_side
+	: nonterminal
+		{ $1 }
+	| nonterminal TK_SQUOT
+		{ $1 ^ "'" }
+
+
+nonterminal
+	: nonterminal_word
+		{ $1 }
+        | nonterminal_word TK_LBRACK arguments TK_RBRACK
+		{ $1 ^ "(" ^ $3 ^ ")" }
+
+
+arguments
+	: argument
+		{ $1 }
+        | arguments TK_COMMA argument
+		{ $1 ^ "," ^ $3 }
+
+
+argument
+	: nonterminal	{ $1 }
+        | TK_TERM	{ $1 }
+
+
+right_hand_side
+	: rhs_part*
+		{ $1 }
+
+
+rhs_part
+	: input
+		{ $1 }
+        | TK_LSQBRACK right_hand_side TK_RSQBRACK
+		{ Bracketed ($2) }
+
+
+nonterminal_word
+	: TK_NTERM	{ $1 }
+	| KW_ACCEPT	{ "accept" }
+	| KW_ON		{ "On" }
+	| KW_PRODUCTION	{ "production" }
+	| KW_REDUCE	{ "reduce" }
+	| KW_SHIFT	{ "shift" }
+	| KW_STATE	{ "state" }
+	| KW_TO		{ "to" }
+
+
+input
+	: nonterminal
+		{ Nonterminal $1 }
+        | TK_TERM
+		{ Terminal $1 }
+        | TK_PERIOD
+		{ CurrentPosition }
+        | TK_HASH
+		{ EndOfInput }
+
+
+jump_description
+	: TK_MINMIN KW_ON input action
+		{ Jump ($3, $4) }
+
+
+conflict
+	: TK_STARSTAR KW_CONFLICT KW_ON TK_TERM+ TK_NEWLINE
+        	{ 0 }
+
+
+action
+	: KW_SHIFT KW_TO KW_STATE TK_INTEGER TK_NEWLINE
+		{ Shift ($4) }
+        | KW_REDUCE KW_PRODUCTION production
+		{ Reduce ($3) }
+        | KW_ACCEPT nonterminal TK_NEWLINE
+		{ Accept ($2) }
diff --git a/automerr/automerr.ml b/automerr/automerr.ml
@@ -0,0 +1,96 @@
+open A_ast
+open Sexplib.Sexp
+
+let bprintf = Printf.bprintf
+
+let codegen out strings states =
+  bprintf out "let expected state =\n";
+  bprintf out "  match state with\n";
+  List.iter (fun (State (state, productions, jumps)) ->
+    let terms =
+      List.fold_left (fun terms jump ->
+        match jump with
+        | Jump ((Terminal term), (Shift _)) ->
+            term :: terms
+        | _ -> terms
+      ) [] jumps
+    in
+
+    let lookup table key =
+      try
+        Hashtbl.find table key
+      with Not_found ->
+        key
+    in
+
+    match terms with
+    | _::_ ->
+        bprintf out "  | %d -> [" state;
+        List.iter (bprintf out "\"%s\";") (List.map String.escaped (List.map (lookup strings) terms));
+        bprintf out "]\n"
+    | _ -> ()
+  ) states;
+  bprintf out "  | _ -> []\n"
+
+
+let open_file file = if file = "-" then stdin else open_in file
+
+
+let parse_strings strings =
+  let lexbuf = Lexing.from_channel (open_file strings) in
+  let table = Hashtbl.create 10 in
+  begin try
+    while true do
+      match T_lexer.token lexbuf with
+      | (_, term, str) -> Hashtbl.add table term str
+    done
+  with T_lexer.Eof ->
+    ()
+  end;
+  table
+
+let debug token lexbuf =
+  let tok = token lexbuf in
+  let open A_parser in
+  let str =
+    match tok with
+    | EOF -> "EOF"
+
+    | TK_NEWLINE -> "TK_NEWLINE"
+    | TK_COLON -> "TK_COLON"
+    | TK_SQUOT -> "TK_SQUOT"
+    | TK_PERIOD -> "TK_PERIOD"
+    | TK_ARROW -> "TK_ARROW"
+    | TK_LSQBRACK -> "TK_LSQBRACK"
+    | TK_RSQBRACK -> "TK_RSQBRACK"
+    | TK_HASH -> "TK_HASH"
+    | TK_MINMIN -> "TK_MINMIN"
+    | TK_LBRACK -> "TK_LBRACK"
+    | TK_RBRACK -> "TK_RBRACK"
+    | TK_COMMA -> "TK_COMMA"
+    | TK_STARSTAR -> "TK_STARSTAR"
+
+    | KW_ACCEPT -> "KW_ACCEPT"
+    | KW_ON -> "KW_ON"
+    | KW_PRODUCTION -> "KW_PRODUCTION"
+    | KW_REDUCE -> "KW_REDUCE"
+    | KW_SHIFT -> "KW_SHIFT"
+    | KW_STATE -> "KW_STATE"
+    | KW_CONFLICT -> "KW_CONFLICT"
+    | KW_TO -> "KW_TO"
+
+    | TK_NTERM s -> "TK_NTERM " ^ s
+    | TK_TERM s -> "TK_TERM " ^ s
+    | TK_INTEGER d -> "TK_INTEGER " ^ string_of_int d
+  in
+  Printf.printf "%s\n" str;
+  tok
+
+let parse_states states =
+  let lexbuf = Lexing.from_channel (open_file states) in
+  try
+    A_parser.parse A_lexer.token lexbuf
+  with A_parser.StateError (token, state) ->
+    let expected = A_errors.expected state in
+    Printf.fprintf stderr "expected one of: %s\n" (String.concat ", " expected);
+    raise Exit
diff --git a/automerr/t_lexer.mll b/automerr/t_lexer.mll
@@ -0,0 +1,25 @@
+{
+  exception Eof
+}
+
+
+let d = ['0'-'9']
+let ucase = ['A'-'Z']
+
+let term = ucase (ucase | d | '_')*
+
+let toktype = '<' [^'>']+ '>'
+
+let dstring	= '"'  ('\\' _ | [^ '\\' '"' ])* '"'
+
+let ws = [' ' '\t']
+
+rule token = parse
+  | "%token" (toktype as ty)? ws+ (term as term) ws+ (dstring as str)
+  		{ (ty, term, str) }
+
+  | "%%"	{ raise Eof }
+
+  | '\n'	{ token lexbuf }
+
+  | _ as c	{ failwith (Char.escaped c) }
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		"a_ast.ml": syntax_camlp4o, pkg_sexplib.syntax
		"automerr.ml": pkg_sexplib