diff --git a/format.lua b/format.lua index f416a2ac..bb0a8e17 100644 --- a/format.lua +++ b/format.lua @@ -1,6 +1,6 @@ local lpeg = require "lpeg" -local P, C, Cg, Cc, Cmt, Ct, Cb, Cp, Cf, S, V, R = - lpeg.P, lpeg.C, lpeg.Cg, lpeg.Cc, lpeg.Cmt, lpeg.Ct, lpeg.Cb, lpeg.Cp, lpeg.Cf, lpeg.S, lpeg.V, lpeg.R +local P, C, Cg, Cc, Cmt, Ct, Cb, Cp, Cf, Cs, S, V, R = + lpeg.P, lpeg.C, lpeg.Cg, lpeg.Cc, lpeg.Cmt, lpeg.Ct, lpeg.Cb, lpeg.Cp, lpeg.Cf, lpeg.Cs, lpeg.S, lpeg.V, lpeg.R -- SLN -- expressions, atoms, lists @@ -8,6 +8,16 @@ local P, C, Cg, Cc, Cmt, Ct, Cb, Cp, Cf, S, V, R = -- a python SLN parser: https://github.com/salotz/python-sln/blob/master/src/sln/parser.py local anchor_mt = { + + __lt = function(fst, snd) + return snd.line > fst.line or (snd.line == fst.line and snd.char > fst.char) + end, + __le = function(fst, snd) + return fst < snd or fst == snd + end, + __eq = function(fst, snd) + return (snd.line == fst.line and snd.char == fst.char) + end, __tostring = function(self) return "in file " .. self.sourceid .. ", line " .. self.line .. " character " .. self.char end, @@ -17,37 +27,60 @@ local function element(kind, pattern) return Ct(V "anchor" * Cg(Cc(kind), "kind") * pattern) end +local function symbol(value) + return element("symbol", Cg(value, "str")) +end + local function space_tokens(pattern) local token_spacer = S "\t " ^ 0 return pattern * token_spacer end -local function list(pattern) - return element("list", Cg(Ct(space_tokens(pattern)), "elements") * V "endpos") +local function thread_newline(pattern, func) + local list = Cg(Ct(pattern * Cg(Cb("newline_pos"), "newline_pos")), "temp") + -- these are Cmts for perf reasons + * Cmt(Cb("temp"), function(_, _, value) + return true, func(value) + end) + * Cg( + Cmt(Cb("temp"), function(_, _, temp) + return true, temp["newline_pos"] + end), + "newline_pos" + ) + * Cg(Cc(nil), "temp") + + return list end -local function replace_escape(data) - if data == [[\\]] then - return [[\]] - elseif data == [[\"]] then - return [["]] - elseif data == [[\n]] then - return "\n" - elseif data == [[\r]] then - return "\r" - elseif data == [[\t]] then - return "\t" - end +local function list(pattern) + return thread_newline(V "anchor" * space_tokens(pattern) * V "endpos", function(elements) + return { + kind = "list", + elements = elements, + anchor = elements.anchor, + endpos = elements.endpos, + } + end) end -local function string_concat(a, b) - if a and not b then - return a - elseif b and not a then - return b - else - return a .. b +local function create_literal(elements) + local val = {} + + for _, v in ipairs(elements) do + for char in v:gmatch "." do + table.insert(val, string.byte(char)) + end end + + local longstring_val = { + anchor = elements.anchor, + kind = "literal", + literaltype = "bytes", + val = val, + } + + return longstring_val end local grammar = P { @@ -61,13 +94,14 @@ local grammar = P { foreward = Cg(P "" / function(_) return { 0 } end, "indent_level") * Cg(P "" / function() - return { line_num = 1, line_pos = 0 } + return { line_num = 1, line_pos = 1 } end, "newline_pos"), -- every time there's a newline, get it's position. construct a named group with the position -- of the latest (numbered) newline - newline = Cg( - Cmt(Cb("newline_pos") * P "\r" ^ 0 * P "\n" * Cp(), function(body, position, prev_pos) + -- Cmt because of precedence requirements + newline = P "\r" ^ 0 * P "\n" * Cg( + Cmt(Cb("newline_pos") * Cp(), function(_, position, prev_pos) local construct = { line_num = prev_pos["line_num"] + 1, line_pos = position } return true, construct end), @@ -76,24 +110,24 @@ local grammar = P { -- either match the newline or match the beginning of the file filestart = Cg( - Cmt(Cp(), function(body, position, mypos) - if mypos == 1 then - return mypos == 1, { line_num = 0, line_pos = 0 } - else - return mypos == 1 - end + Cmt(Cp(), function(_, _, mypos) + return mypos == 1, { line_num = 1, line_pos = 1 } end), "newline_pos" ), -- every time there's a newline, get it's position, and construct a named group -- subtract the position of the newline and use it as a comparison point - textpos = Cp() * Cb("newline_pos") * lpeg.Carg(1) / function(new_pos, old_pos, filename) + -- complication: we have to get the most recent newline_pos. this includes nested inner values + -- lpeg doesn't have a mechanism natively to support this so we have to pass it up whenever we do this + -- and extract it here + textpos = Cp() * Cb("newline_pos") * lpeg.Carg(1) / function(current_pos, newline_pos, filename) local anchor = { sourceid = filename, - line = old_pos["line_num"], - char = new_pos - old_pos["line_pos"], + line = newline_pos["line_num"], + char = current_pos - newline_pos["line_pos"] + 1, -- obey 1-indexing } + setmetatable(anchor, anchor_mt) return anchor @@ -104,7 +138,7 @@ local grammar = P { anchor = Cg(V "textpos", "anchor"), endpos = Cg(V "textpos", "endpos"), - count_tabs = Cmt(V "textpos" * C(S "\t " ^ 1), function(body, position, anchor, indentstring) + count_tabs = Cmt(V "textpos" * C(S "\t " ^ 1), function(_, _, anchor, indentstring) -- only tabs are allowed -- tabs and spaces must not be interleaved - tabs must happen before spaces. -- TODO: make nice errors in here @@ -128,18 +162,56 @@ local grammar = P { -- an indented block which may have indents and dedents, so long as those indents -- are subordinate to the initial indent - subordinate_indent = V "newline" - * Cmt(Cb("indent_level") * V "count_tabs", function(body, position, prev_indent, this_indent) - return this_indent > prev_indent[#prev_indent] - end), + subordinate_indent = Cmt(Cb("indent_level") * V "count_tabs", function(_, _, prev_indent, this_indent) + local norm_tab_level = this_indent - (prev_indent[#prev_indent] + 1) + local norm_tabs = string.rep("\t", norm_tab_level) + + return this_indent > prev_indent[#prev_indent], norm_tabs + end), + + longstring_body = C((V "unicode_escape" + (1 - (V "newline" + V "splice"))) ^ 1), + longstring_newline = ( + (V "newline" * Cc("\n") * ((Cs(V "subordinate_indent") * V "longstring_body") + (S "\t " ^ 0 * #V "newline"))) + + V "longstring_body" + ), + longstring_literal = thread_newline( + V "anchor" * (V "longstring_body" + V "longstring_newline") ^ 1, + create_literal + ), + + longstring = thread_newline( + V "anchor" * P '""""' * (V "longstring_literal" + V "splice") ^ 0 * V "endpos", + function(elements) + assert(elements) + return { + anchor = elements.anchor, + endpos = elements.endpos, + kind = "string", + elements = elements, + } + end + ), - -- for the time being, accurate recording/reporting of indentation level (indentation level - parent indentation) is unsupported. - contiguous_body = (1 - S "\r\n") ^ 0, - subordinate_body = C(V "contiguous_body") - * (C(V "subordinate_indent" * V "contiguous_body") + (C(V "newline") * S "\t " ^ 0 * #V "newline")) ^ 0, - comment = element("comment", Cg(P "#" * lpeg.Cf(V "subordinate_body", string_concat), "val")), - -- TODO automatically convert body to schema bytes variant - longstring = element("string", Cg(lpeg.Cf((P [[""""]] * V "subordinate_body"), string_concat), "elements")), + comment_body = C((1 - V "newline") ^ 1), + comment = thread_newline( + V "anchor" + * P "#" + * V "comment_body" ^ -1 + * (V "newline" * Cc("\n") * ((Cs(V "subordinate_indent") * V "comment_body") + (S "\t " ^ 0 * #V "newline"))) + ^ 0, + function(elements) + local val = "" + + for _, v in ipairs(elements) do + val = val .. v + end + return { + anchor = elements.anchor, + kind = "comment", + val = val, + } + end + ), -- numbers are limited, they are not bignums, they are standard lua numbers. scopes shares the problem of files not having arbitrary precision -- so it probably doesn't matter. @@ -150,30 +222,31 @@ local grammar = P { ), digit = R("09") ^ 1, hex_digit = (V "digit" + R "AF" + R "af") ^ 1, - decimal = S "-+" ^ 0 * V "digit" * (P "." * V "digit") ^ 0, - hex = S "+-" ^ 0 * P "0x" * V "hex_digit" * (P "." * V "hex_digit") ^ 0, - big_e = V "decimal" * (P "e" * V "decimal") ^ 0, + decimal = S "-+" ^ -1 * V "digit" * (P "." * V "digit") ^ -1, + hex = S "+-" ^ -1 * P "0x" * V "hex_digit" * (P "." * V "hex_digit") ^ -1, + big_e = V "decimal" * (P "e" * V "decimal") ^ -1, float_special = P "+inf" + P "-inf" + P "nan", - symbol = element("symbol", Cg((1 - (S "#;()[]{}," + V "wsp")) ^ 1, "str")), + symbol = symbol((1 - (S "#;()[]{}," + V "wsp")) ^ 1), -- probably works but it doesn't have complex tests splice = P "${" * V "naked_list" * P "}", - backslashes = C(P [[\\]]) / replace_escape, - escape = C(P [[\]] * S [[nrt"]]) / replace_escape, + escape_chars = Cs(P [[\\]] / [[\]] + P [[\"]] / [["]] + P [[\n]] / "\n" + P [[\r]] / "\r" + P [[\t]] / "\t"), unicode_escape = P "\\u" * (V "hex_digit") ^ 4 ^ -4, - string_literal = element( - "literal", - Cg(Ct((C((1 - S [["\]])) + V "escape" + V "backslashes" + V "unicode_escape") ^ 1) / function(chars) - local buffer = {} - for _, element in ipairs(chars) do - table.insert(buffer, string.byte(element)) - end - return buffer - end, "val") * Cg(lpeg.Cc "bytes", "literaltype") + -- for now, longstrings are not going to automatically translate \n into the escape character + string_literal = Ct( + V "anchor" * (V "escape_chars" + V "unicode_escape" + C(1 - (S [["\]] + V "newline" + V "splice"))) ^ 1 + ) / create_literal, + + -- this doesn't work yet + string = thread_newline( + V "anchor" * P '"' * (V "string_literal" + V "splice") ^ 0 * P '"' * V "endpos", + function(elements) + assert(elements) + return { anchor = elements.anchor, endpos = elements.endpos, elements = elements, kind = "string" } + end ), - string = element("string", P '"' * Cg(Ct((V "string_literal" + V "splice") ^ 0), "elements") * P '"' * V "endpos"), tokens = space_tokens( V "comment" + V "function_call" + V "paren_list" + V "longstring" + V "string" + V "number" + V "symbol" @@ -181,12 +254,14 @@ local grammar = P { token_spacer = S "\t " ^ 0, -- LIST SEPARATOR BEHAVIOR IS NOT CONSISTENT BETWEEN BRACED AND NAKED LISTS - base_paren_body = ( - list(V "tokens" ^ 1 * P ";") -- ; control character splits list-up-to-point into child list - + (P "\\" * V "naked_list") -- \ escape char enters naked list mode from inside a paren list. there's probably an edge case here, indentation is going to be wacky + permitted_paren_tokens = ( + (P "\\" * V "naked_list") -- \ escape char enters naked list mode from inside a paren list. there's probably an edge case here, indentation is going to be wacky + V "tokens" - + V "wsp" + + V "newline" + + S "\t " ^ 1 ), + base_paren_body = list(V "permitted_paren_tokens" ^ 1 * P ";") -- ; control character splits list-up-to-point into child list + + V "permitted_paren_tokens", -- the internal of a paren should be -- you can have list separators, that put everything before it into it's own sublist @@ -200,10 +275,19 @@ local grammar = P { ^ 1 * (list(V "base_paren_body" ^ 2) + V "base_paren_body"), + match_paren_open = Cg(C(P "("), "bracetype") + + (Cg(C(P "["), "bracetype") * symbol(Cc("braced_list"))) + + (Cg(C(P "{"), "bracetype") * symbol(Cc("curly-list"))), + match_paren_close = Cmt(Cb("bracetype") * C(S "])}"), function(_, _, bracetype, brace) + local matches = { + ["("] = ")", + ["["] = "]", + ["{"] = "}", + } + return matches[bracetype] == brace + end), paren_body = V "comma_sep_paren_body" + V "base_paren_body" ^ 1, - paren_list = list(P "(" * V "paren_body" * P ")") + list( - element("symbol", Cg(P "[" / "braced_list", "str")) * V "paren_body" * P "]" - ) + list(element("symbol", Cg(P "{" / "curly-list", "str")) * V "paren_body" * P "}"), + paren_list = list(V "match_paren_open" * V "paren_body" ^ 0 * V "match_paren_close"), -- subtly different from the base case -- if there's a set of arguments provided that aren't comma separated, they are automatically interpreted as a child list diff --git a/test.lua b/test.lua index 5aba43f6..f4e767e9 100644 --- a/test.lua +++ b/test.lua @@ -3,6 +3,9 @@ local format = require "format" local inspect = require "inspect" local function simplify_list(list) + if not list then + return nil + end if list.kind == "list" then local simplified_list = {} @@ -18,12 +21,146 @@ local function simplify_list(list) return list.str elseif list.kind == "comment" then return list.val + elseif list.kind == "string" then + local val = "" + + for _, v in ipairs(list.elements) do + for _, c in ipairs(v.val) do + val = val .. string.char(c) + end + end + + return { kind = "string", elements = { val } } else print("error, unsupported value: ", list.kind) end end end +local anchor_mt = { + __lt = function(fst, snd) + return snd.line > fst.line or (snd.line == fst.line and snd.char > fst.char) + end, + __le = function(fst, snd) + return fst < snd or fst == snd + end, + __eq = function(fst, snd) + return (snd.line == fst.line and snd.char == fst.char) + end, + + __tostring = function(self) + return "in file " .. self.sourceid .. ", line " .. self.line .. " character " .. self.char + end, +} + +local function create_anchor(line, char) + local anchor = { + char = char, + line = line, + sourceid = "inline", + } + + setmetatable(anchor, anchor_mt) + return anchor +end + +local function create_list(anchor, endpos, elements) + return { + anchor = anchor, + endpos = endpos, + kind = "list", + elements = elements, + } +end + +local function create_symbol(anchor, symbol) + return { + anchor = anchor, + kind = "symbol", + str = symbol, + } +end + +-- assert that the cursor is always moving forward +local function forward_moving_cursor(element, cursor) + local cursor = cursor -- the moving cursor + + if element.kind == "list" then + if not cursor then + cursor = element.anchor + else + if not (element.anchor >= cursor) then + -- print("failed, ", element.anchor, " >= ", cursor) + return false + end + end + + for i, v in ipairs(element.elements) do + local accumulator, newcursor = forward_moving_cursor(element.elements[i], cursor) + + if not accumulator then + -- print("accumulator failure") + return false + end + if not (newcursor >= cursor) then + -- print("failed, ", newcursor, " >= ", cursor) + return false + end + + cursor = newcursor + end + + if not (element.endpos >= cursor) then + -- print("failed, ", element.endpos, " >= ", cursor) + return false + end + cursor = element.endpos + + return true, cursor + elseif element.kind == "literal" or element.kind == "symbol" or element.kind == "comment" then + -- print(element.anchor, " >= ", cursor, ", ", element.anchor >= cursor) + return element.anchor >= cursor, element.anchor + end + + return true, cursor +end + +local function samelength_testfile_list(text, ast) + local _, num_newlines = text:gsub("\n", "\n") + + if not ((ast.endpos.line == num_newlines) or (ast.endpos.line == (num_newlines + 1))) then + print("ast: ", ast.endpos.line, "num_newlines:", num_newlines) + end + -- print(inspect(ast)) + + -- why is this even necessary?? + return (ast.endpos.line == num_newlines) or (ast.endpos.line == (num_newlines + 1)) +end + +local function compare_list_anchors(actual, expected) + if + (expected.anchor.line == actual.anchor.line and expected.anchor.char == actual.anchor.char) + and (expected.kind == actual.kind) + then + if expected.kind == "list" then + if not (expected.endpos.line == actual.endpos.line and expected.endpos.char == actual.endpos.char) then + print("expected endpos: ", expected.endpos, expected.kind, " actual: ", actual.endpos, actual.kind) + return false + end + + for i, _ in ipairs(expected.elements) do + if not compare_list_anchors(actual.elements[i], expected.elements[i]) then + return false + end + end + end + return true + else + print("expected anchor: ", expected.anchor, expected.kind, " actual: ", actual.anchor, actual.kind) + return false + end +end + function testOldOpensCase() local example = [[ do @@ -109,7 +246,6 @@ function testsinglelist() dump-env number2 - anotherthing something else @@ -163,6 +299,8 @@ a b c for i = 1, #example do local results = format.parse(example[i], "inline") + luaunit.assertTrue(forward_moving_cursor(results)) + luaunit.assertTrue(samelength_testfile_list(example[i], results)) luaunit.assertEquals(simplify_list(results), expected[i]) end end @@ -198,6 +336,8 @@ function testSymbols() for i = 1, #example do local results = format.parse(example[i], "inline") + luaunit.assertTrue(forward_moving_cursor(results)) + luaunit.assertTrue(samelength_testfile_list(example[i], results)) luaunit.assertEquals(simplify_list(results), expected[i]) end end @@ -249,7 +389,9 @@ function testNumbers() for i = 1, #example do local results = format.parse(example[i], "inline") + luaunit.assertTrue(forward_moving_cursor(results)) luaunit.assertEquals(simplify_list(results), expected[i]) + luaunit.assertTrue(samelength_testfile_list(example[i], results)) end end @@ -285,20 +427,21 @@ print for i = 1, #example do local results = format.parse(example[i], "inline") luaunit.assertEquals(simplify_list(results), expected[i]) + luaunit.assertTrue(forward_moving_cursor(results)) + luaunit.assertTrue(samelength_testfile_list(example[i], results)) end end function testempty() local example = [[ - - ]] local expected = {} local parsed = format.parse(example, "inline") luaunit.assertEquals(simplify_list(parsed), expected) + luaunit.assertTrue(samelength_testfile_list(example, parsed)) end function testnakedlist2() @@ -324,6 +467,7 @@ toplevel local results = format.parse(example[i], "inline") local parsed = simplify_list(results) luaunit.assertEquals(parsed, expected[i]) + luaunit.assertTrue(samelength_testfile_list(example[i], results)) end end @@ -344,7 +488,9 @@ function testbracedlist() for i = 1, #example do local results = format.parse(example[i], "inline") + luaunit.assertTrue(forward_moving_cursor(results)) luaunit.assertEquals(simplify_list(results), expected[i]) + luaunit.assertTrue(samelength_testfile_list(example[i], results)) end end @@ -363,13 +509,14 @@ function testcomments() local expected = { { 1, " list of one", 1 }, { - " i am a normal comment created by a normal human\n\tand this comment is intended to be useful\n\t\tsee?\n\n\tall of this is on one line ", + " i am a normal comment created by a normal human\nand this comment is intended to be useful\n\tsee?\n\nall of this is on one line ", }, { " i" }, } for i = 1, #example do local results = format.parse(example[i], "inline") + luaunit.assertTrue(samelength_testfile_list(example[i], results)) luaunit.assertEquals(simplify_list(results), expected[i]) end end @@ -405,19 +552,166 @@ function testcomma() for i = 1, #example do local results = format.parse(example[i], "inline") + -- print(inspect(results)) luaunit.assertEquals(simplify_list(results), expected[i]) + luaunit.assertTrue(samelength_testfile_list(example[i], results)) + end +end + +function testanchors() + local example = { + "hello hi greetings", + [[ +hello + nested +hi +]], + [[ +hello + nested a +hi +]], + + [[ +hello +(this is +a test) +]], + [[ +hello + (this is +a test) +]], + -- [[ + -- toplevel + -- nested hi + -- innerlayer + -- outerlayer]], + -- [[toplevel + -- innerlevel + -- innestlevel]], + } + + local expected = { + create_list(create_anchor(1, 1), create_anchor(1, 19), { + create_list(create_anchor(1, 1), create_anchor(1, 19), { + create_symbol(create_anchor(1, 1), "hello"), + create_symbol(create_anchor(1, 7), "hi"), + create_symbol(create_anchor(1, 10), "greetings"), + }), + }), + + create_list(create_anchor(1, 1), create_anchor(4, 1), { + create_list(create_anchor(1, 1), create_anchor(2, 8), { + create_symbol(create_anchor(1, 1), "hello"), + create_symbol(create_anchor(2, 2), "nested"), + }), + create_symbol(create_anchor(3, 1), "hi"), + }), + + create_list(create_anchor(1, 1), create_anchor(4, 1), { + create_list(create_anchor(1, 1), create_anchor(2, 10), { + create_symbol(create_anchor(1, 1), "hello"), + create_list(create_anchor(2, 2), create_anchor(2, 10), { + create_symbol(create_anchor(2, 2), "nested"), + create_symbol(create_anchor(2, 9), "a"), + }), + }), + create_symbol(create_anchor(3, 1), "hi"), + }), + + create_list(create_anchor(1, 1), create_anchor(4, 1), { + create_symbol(create_anchor(1, 1), "hello"), + create_list(create_anchor(2, 1), create_anchor(3, 8), { + create_symbol(create_anchor(2, 2), "this"), + create_symbol(create_anchor(2, 7), "is"), + create_symbol(create_anchor(3, 1), "a"), + create_symbol(create_anchor(3, 3), "test"), + }), + }), + create_list(create_anchor(1, 1), create_anchor(4, 1), { + create_list(create_anchor(1, 1), create_anchor(4, 1), { + create_symbol(create_anchor(1, 1), "hello"), + create_list(create_anchor(2, 2), create_anchor(3, 8), { + create_symbol(create_anchor(2, 3), "this"), + create_symbol(create_anchor(2, 8), "is"), + create_symbol(create_anchor(3, 1), "a"), + create_symbol(create_anchor(3, 3), "test"), + }), + }), + }), + } + + assert(#expected == #example) + + for i = 1, #example do + local results = format.parse(example[i], "inline") + luaunit.assertEquals(simplify_list(results), simplify_list(expected[i])) + luaunit.assertTrue(compare_list_anchors(results, expected[i])) + luaunit.assertTrue(forward_moving_cursor(results)) + luaunit.assertTrue(samelength_testfile_list(example[i], results)) end end --- function teststrings() --- local example = { --- " \"this is a string ${with a splice} and more text over here\" " --- } +function testlongstring() + local example = { + [[ +let array-type = + intrinsic + """" + local terms = require './terms' + local value = terms.value + local gens = require './terms-generators' + local value_array = gens.declare_array(value) + local id = {name = "array"} + local function mktype(elem) + return value.prim_boxed_type(id, value_array(elem)) + end + return mktype + prim-func-type (T : prim-type) -> (array : prim-type) + +]], + } --- local expected = { --- "this is a string" --- } + local expected = { + { + { + "let", + "array-type", + "=", + { + "intrinsic", + { + kind = "string", + elements = { + "\n" .. [[ +local terms = require './terms' +local value = terms.value +local gens = require './terms-generators' +local value_array = gens.declare_array(value) +local id = {name = "array"} +local function mktype(elem) + return value.prim_boxed_type(id, value_array(elem)) +end +return mktype]], + }, + }, + { "prim-func-type", { "T", ":", "prim-type" }, "->", { "array", ":", "prim-type" } }, + }, + }, + }, + } + + assert(#expected == #example) --- end + for i = 1, #example do + local results = format.parse(example[i], "inline") + -- print("test# ", i) + -- print("results: ", inspect(results)) + luaunit.assertEquals(simplify_list(results), expected[i]) + luaunit.assertTrue(forward_moving_cursor(results)) + luaunit.assertTrue(samelength_testfile_list(example[i], results)) + end +end os.exit(luaunit.LuaUnit.run())