Skip to content

Commit

Permalink
Initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
pippijn committed May 8, 2012
1 parent e308d87 commit aee3b65
Show file tree
Hide file tree
Showing 24 changed files with 992 additions and 1 deletion.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
merr
====

Error message generator for state/token pairs.
Error message generator for state/token pairs.

Based on ideas from Clinton Jeffery's "merr" tool
at http://unicon.sourceforge.net/merr/.
3 changes: 3 additions & 0 deletions _tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
"automerr": include
"common": include
"merr": include
2 changes: 2 additions & 0 deletions automerr/_tags
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
"a_ast.ml": syntax_camlp4o, pkg_sexplib.syntax
"automerr.ml": pkg_sexplib
33 changes: 33 additions & 0 deletions automerr/a_ast.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
open Sexplib.Conv

type statenum = int with sexp
type nonterminal = string with sexp
type terminal = string with sexp

type right_side =
| Nonterminal of nonterminal
| Terminal of terminal
| CurrentPosition
| EndOfInput
| Bracketed of right_side list
with sexp

type production =
| Production of nonterminal * right_side list
with sexp

type action =
| Shift of statenum
| Reduce of production
| Accept of nonterminal
with sexp

type jump =
| Jump of right_side * action
with sexp

type state =
| State of statenum * production list * jump list
with sexp

type states = state list with sexp
36 changes: 36 additions & 0 deletions automerr/a_errors.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
let expected state =
match state with
| 0 -> ["\"state\"";]
| 1 -> ["\"integer\"";]
| 2 -> ["\":\"";]
| 3 -> ["\"new line\"";]
| 4 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";]
| 13 -> ["\"new line\"";]
| 15 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";]
| 16 -> ["\"(\"";]
| 17 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";"\"terminal\"";]
| 20 -> ["\",\"";"\")\"";]
| 22 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";"\"terminal\"";]
| 25 -> ["\"'\"";]
| 28 -> ["\"->\"";]
| 29 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"[\"";"\"nonterminal\"";"\".\"";"\"terminal\"";]
| 32 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"[\"";"\"nonterminal\"";"\".\"";"\"terminal\"";]
| 34 -> ["\"]\"";]
| 36 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"[\"";"\"nonterminal\"";"\".\"";"\"terminal\"";]
| 41 -> ["\"new line\"";]
| 43 -> ["\"--\"";]
| 44 -> ["\"on\"";]
| 45 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"#\"";"\"nonterminal\"";"\".\"";"\"terminal\"";]
| 46 -> ["\"accept\"";"\"reduce\"";"\"shift\"";]
| 47 -> ["\"to\"";]
| 48 -> ["\"state\"";]
| 49 -> ["\"integer\"";]
| 50 -> ["\"new line\"";]
| 52 -> ["\"production\"";]
| 53 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";]
| 55 -> ["\"accept\"";"\"on\"";"\"production\"";"\"reduce\"";"\"shift\"";"\"state\"";"\"to\"";"\"nonterminal\"";]
| 56 -> ["\"new line\"";]
| 60 -> ["\"--\"";]
| 62 -> ["\"state\"";]
| 65 -> ["\"end of input\"";]
| _ -> []
49 changes: 49 additions & 0 deletions automerr/a_lexer.mll
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
{
open A_parser
}


let d = ['0'-'9']

let lcase = ['a'-'z']
let ucase = ['A'-'Z']
let idchar = (lcase | ucase | d | '_')

let nterm = lcase idchar*
let term = ucase (ucase | d | '_')*

rule token = parse
| "accept" { KW_ACCEPT }
| "on"
| "On" { KW_ON }
| "production" { KW_PRODUCTION }
| "reduce" { KW_REDUCE }
| "shift" { KW_SHIFT }
| "state"
| "State" { KW_STATE }
| "Conflict" { KW_CONFLICT }
| "to" { KW_TO }

| "**" { TK_STARSTAR }
| "," { TK_COMMA }
| "(" { TK_LBRACK }
| ")" { TK_RBRACK }
| ":" { TK_COLON }
| "'" { TK_SQUOT }
| "." { TK_PERIOD }
| "[" { TK_LSQBRACK }
| "]" { TK_RSQBRACK }
| "#" { TK_HASH }
| "--" { TK_MINMIN }
| "->" { TK_ARROW }

| nterm as id { TK_NTERM id }
| term as id { TK_TERM id }
| d+ as num { TK_INTEGER (int_of_string num) }

| '\n' { TK_NEWLINE }
| ' ' { token lexbuf }

| _ as c { failwith ("invalid character: " ^ Char.escaped c) }

| eof { EOF }
136 changes: 136 additions & 0 deletions automerr/a_parser.mly
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
%{
open A_ast
%}

%token EOF

%token TK_NEWLINE
%token TK_COLON
%token TK_SQUOT
%token TK_PERIOD
%token TK_ARROW
%token TK_LSQBRACK
%token TK_RSQBRACK
%token TK_HASH
%token TK_MINMIN
%token TK_LBRACK
%token TK_RBRACK
%token TK_COMMA
%token TK_STARSTAR

%token KW_ACCEPT
%token KW_ON
%token KW_PRODUCTION
%token KW_REDUCE
%token KW_SHIFT
%token KW_STATE
%token KW_CONFLICT
%token KW_TO

%token<string> TK_NTERM
%token<string> TK_TERM
%token<int> TK_INTEGER


%start parse
%type<A_ast.states> parse

%%

parse
: state+ EOF
{ $1 }


state
: KW_STATE TK_INTEGER TK_COLON TK_NEWLINE state_description TK_NEWLINE
{ State ($2, fst $5, snd $5) }


state_description
: production+ jump_description+ conflict?
{ $1, $2 }


production
: left_hand_side TK_ARROW right_hand_side TK_NEWLINE
{ Production ($1, $3) }


left_hand_side
: nonterminal
{ $1 }
| nonterminal TK_SQUOT
{ $1 ^ "'" }


nonterminal
: nonterminal_word
{ $1 }
| nonterminal_word TK_LBRACK arguments TK_RBRACK
{ $1 ^ "(" ^ $3 ^ ")" }


arguments
: argument
{ $1 }
| arguments TK_COMMA argument
{ $1 ^ "," ^ $3 }


argument
: nonterminal { $1 }
| TK_TERM { $1 }


right_hand_side
: rhs_part*
{ $1 }


rhs_part
: input
{ $1 }
| TK_LSQBRACK right_hand_side TK_RSQBRACK
{ Bracketed ($2) }


nonterminal_word
: TK_NTERM { $1 }
| KW_ACCEPT { "accept" }
| KW_ON { "On" }
| KW_PRODUCTION { "production" }
| KW_REDUCE { "reduce" }
| KW_SHIFT { "shift" }
| KW_STATE { "state" }
| KW_TO { "to" }


input
: nonterminal
{ Nonterminal $1 }
| TK_TERM
{ Terminal $1 }
| TK_PERIOD
{ CurrentPosition }
| TK_HASH
{ EndOfInput }


jump_description
: TK_MINMIN KW_ON input action
{ Jump ($3, $4) }


conflict
: TK_STARSTAR KW_CONFLICT KW_ON TK_TERM+ TK_NEWLINE
{ 0 }


action
: KW_SHIFT KW_TO KW_STATE TK_INTEGER TK_NEWLINE
{ Shift ($4) }
| KW_REDUCE KW_PRODUCTION production
{ Reduce ($3) }
| KW_ACCEPT nonterminal TK_NEWLINE
{ Accept ($2) }
96 changes: 96 additions & 0 deletions automerr/automerr.ml
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
open A_ast
open Sexplib.Sexp

let bprintf = Printf.bprintf

let codegen out strings states =
bprintf out "let expected state =\n";
bprintf out " match state with\n";
List.iter (fun (State (state, productions, jumps)) ->
let terms =
List.fold_left (fun terms jump ->
match jump with
| Jump ((Terminal term), (Shift _)) ->
term :: terms
| _ -> terms
) [] jumps
in

let lookup table key =
try
Hashtbl.find table key
with Not_found ->
key
in

match terms with
| _::_ ->
bprintf out " | %d -> [" state;
List.iter (bprintf out "\"%s\";") (List.map String.escaped (List.map (lookup strings) terms));
bprintf out "]\n"
| _ -> ()
) states;
bprintf out " | _ -> []\n"


let open_file file = if file = "-" then stdin else open_in file


let parse_strings strings =
let lexbuf = Lexing.from_channel (open_file strings) in
let table = Hashtbl.create 10 in
begin try
while true do
match T_lexer.token lexbuf with
| (_, term, str) -> Hashtbl.add table term str
done
with T_lexer.Eof ->
()
end;
table

let debug token lexbuf =
let tok = token lexbuf in
let open A_parser in
let str =
match tok with
| EOF -> "EOF"

| TK_NEWLINE -> "TK_NEWLINE"
| TK_COLON -> "TK_COLON"
| TK_SQUOT -> "TK_SQUOT"
| TK_PERIOD -> "TK_PERIOD"
| TK_ARROW -> "TK_ARROW"
| TK_LSQBRACK -> "TK_LSQBRACK"
| TK_RSQBRACK -> "TK_RSQBRACK"
| TK_HASH -> "TK_HASH"
| TK_MINMIN -> "TK_MINMIN"
| TK_LBRACK -> "TK_LBRACK"
| TK_RBRACK -> "TK_RBRACK"
| TK_COMMA -> "TK_COMMA"
| TK_STARSTAR -> "TK_STARSTAR"

| KW_ACCEPT -> "KW_ACCEPT"
| KW_ON -> "KW_ON"
| KW_PRODUCTION -> "KW_PRODUCTION"
| KW_REDUCE -> "KW_REDUCE"
| KW_SHIFT -> "KW_SHIFT"
| KW_STATE -> "KW_STATE"
| KW_CONFLICT -> "KW_CONFLICT"
| KW_TO -> "KW_TO"

| TK_NTERM s -> "TK_NTERM " ^ s
| TK_TERM s -> "TK_TERM " ^ s
| TK_INTEGER d -> "TK_INTEGER " ^ string_of_int d
in
Printf.printf "%s\n" str;
tok

let parse_states states =
let lexbuf = Lexing.from_channel (open_file states) in
try
A_parser.parse A_lexer.token lexbuf
with A_parser.StateError (token, state) ->
let expected = A_errors.expected state in
Printf.fprintf stderr "expected one of: %s\n" (String.concat ", " expected);
raise Exit
25 changes: 25 additions & 0 deletions automerr/t_lexer.mll
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
exception Eof
}


let d = ['0'-'9']
let ucase = ['A'-'Z']

let term = ucase (ucase | d | '_')*

let toktype = '<' [^'>']+ '>'

let dstring = '"' ('\\' _ | [^ '\\' '"' ])* '"'

let ws = [' ' '\t']

rule token = parse
| "%token" (toktype as ty)? ws+ (term as term) ws+ (dstring as str)
{ (ty, term, str) }

| "%%" { raise Eof }

| '\n' { token lexbuf }

| _ as c { failwith (Char.escaped c) }
Loading

0 comments on commit aee3b65

Please sign in to comment.