diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e69de29 diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..07fd654 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/xenomote/object_language + +go 1.19 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e69de29 diff --git a/main.go b/main.go new file mode 100644 index 0000000..b7cfe35 --- /dev/null +++ b/main.go @@ -0,0 +1,40 @@ +package main + +import ( + "fmt" + "log" + + "github.com/xenomote/object_language/pattern" +) + +func main() { + p, err := pattern.Parse(` + { + "type": "event.source", + "data": { + "metadata": <=x> { + "set": null + } + } + } + `) + if err != nil { + log.Fatalln(err) + } + + b, err := p.Interpret(` + { + "type": "event.source", + "data": { + "metadata": { + "set": "null" + } + } + } + `) + if err != nil { + log.Fatalln(err) + } + + fmt.Println(b) +} diff --git a/pattern/array.go b/pattern/array.go new file mode 100644 index 0000000..707f68f --- /dev/null +++ b/pattern/array.go @@ -0,0 +1,112 @@ +package pattern + +import ( + "encoding/json" + "fmt" +) + +func (a Array) Interpret(s string) (bindings map[string]string, err error) { + return a.Match(s, map[string]string{}) +} + +func (a Array) Match(s string, old_bindings map[string]string) (new_bindings map[string]string, err error) { + new_bindings = map[string]string{} + + bindings := map[string]string{} + for k, v := range old_bindings { + bindings[k] = v + } + + var input []json.RawMessage + err = json.Unmarshal([]byte(s), &input) + if err != nil { + input := "input" + if len(s) < 10 { + input = "\"" + s + "\"" + } + + return nil, fmt.Errorf("%s could not be interpreted as an array: %w", input, err) + } + + for _, definition := range a.Definitions { + index, err := definition.Index.Index() + if err != nil { + return nil, err + } + + prefix := "" + if definition.Index.String() != fmt.Sprint(index) { + prefix = definition.Index.String() + " = " + } + + if index >= len(input) { + if definition.Optional { + continue + } else { + return nil, fmt.Errorf("array was not long enough to contain required index %s%d", prefix, index) + } + } + + value := input[index] + matched_bindings, err := definition.Assignment.Match(string(value), bindings) + if err != nil { + return nil, fmt.Errorf("could not match index %s%d: %s", prefix, index, err) + } + + for k, v := range matched_bindings { + if _, k_exists := bindings[k]; k_exists { + return nil, fmt.Errorf("binding for %s already exists and cannot be overwritten", k) + } + + bindings[k] = v + new_bindings[k] = v + } + } + + return new_bindings, nil +} + +func (a Array) Validate(bindings map[string]bool) error { + indices := make(map[string]bool) + for _, d := range a.Definitions { + if _, exists := indices[d.Index.String()]; exists { + return fmt.Errorf("duplicate index %s", d.Index.String()) + } + + indices[d.Index.String()] = true + } + + for _, d := range a.Definitions { + if err := d.Validate(bindings); err != nil { + return fmt.Errorf("at index %s: %s", d.Index, err) + } + } + + return nil +} + +func (a Array) String() string { + s := "[" + + for i, definition := range a.Definitions { + s += "\n" + indent(definition.String()) + + if i < len(a.Definitions)-1 { + s += "," + } else { + s += "\n" + } + } + + s += "]" + + return s +} + +func (o ArrayDefinition) String() string { + op := "" + if o.Optional { + op = "?" + } + return o.Index.String() + op + ": " + o.Assignment.String() +} diff --git a/pattern/binding.go b/pattern/binding.go new file mode 100644 index 0000000..80104b0 --- /dev/null +++ b/pattern/binding.go @@ -0,0 +1,20 @@ +package pattern + +import "fmt" + +func (b Binding) Match(s string, bindings map[string]string) (map[string]string, error) { + return map[string]string{string(b): s}, nil +} + +func (b Binding) Validate(bindings map[string]bool) error { + if _, exists := bindings[string(b)]; exists { + return fmt.Errorf("duplicate binding %s", b) + } + + bindings[string(b)] = true + return nil +} + +func (b Binding) String() string { + return "<=" + string(b) + ">" +} diff --git a/pattern/grammar.go b/pattern/grammar.go new file mode 100644 index 0000000..6ab4ab4 --- /dev/null +++ b/pattern/grammar.go @@ -0,0 +1,689 @@ +// Code generated by goyacc -o grammar.go grammar.y. DO NOT EDIT. + +//line grammar.y:1 + +package pattern + +import __yyfmt__ "fmt" + +//line grammar.y:3 + +//line grammar.y:5 +type yySymType struct { + yys int + num float64 + str string + + pattern Pattern + arrdef ArrayDefinition + arrdefl []ArrayDefinition + objdef ObjectDefinition + objdefl []ObjectDefinition + opid OptionalIdentifier + opidl []OptionalIdentifier + ass Assignment + ref Reference + ind Index + key Key + bnd Binding +} + +const NULL = 57346 +const TRUE = 57347 +const FALSE = 57348 +const NUMBER = 57349 +const STRING = 57350 +const IDENTIFIER = 57351 + +var yyToknames = [...]string{ + "$end", + "error", + "$unk", + "NULL", + "TRUE", + "FALSE", + "NUMBER", + "STRING", + "IDENTIFIER", + "'['", + "']'", + "','", + "':'", + "'?'", + "'{'", + "'}'", + "'<'", + "'='", + "'>'", + "'.'", +} + +var yyStatenames = [...]string{} + +const yyEofCode = 1 +const yyErrCode = 2 +const yyInitialStackSize = 16 + +//line yacctab:1 +var yyExca = [...]int8{ + -1, 1, + 1, -1, + -2, 0, +} + +const yyPrivate = 57344 + +const yyLast = 65 + +var yyAct = [...]int8{ + 44, 25, 27, 29, 32, 33, 31, 30, 52, 4, + 13, 49, 50, 45, 5, 8, 28, 29, 32, 33, + 31, 30, 42, 4, 38, 51, 15, 21, 5, 40, + 41, 20, 37, 24, 11, 4, 22, 23, 46, 39, + 5, 47, 18, 19, 36, 16, 17, 45, 10, 48, + 15, 53, 6, 10, 35, 1, 43, 34, 26, 14, + 9, 12, 7, 3, 2, +} + +var yyPact = [...]int16{ + 25, -1000, -1000, -1000, 41, 18, -1000, 34, -1000, 29, + -1000, -1000, 15, -1000, 23, -1000, -1000, 46, -1, 31, + -1000, 42, -1, 26, -1000, -1000, 13, -1000, 4, -1000, + -1000, -1000, -1000, -1000, -1000, -1000, -1, -1000, -1000, -1, + -1000, 38, 40, -8, -1000, 11, -1000, -1000, -11, -1000, + 38, -1000, -1000, -1000, +} + +var yyPgo = [...]int8{ + 0, 64, 63, 54, 1, 2, 15, 62, 10, 61, + 60, 59, 58, 57, 0, 56, +} + +var yyR1 = [...]int8{ + 0, 3, 3, 1, 1, 7, 7, 6, 6, 2, + 2, 9, 9, 8, 8, 10, 11, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 12, 13, 15, + 15, 14, 14, +} + +var yyR2 = [...]int8{ + 0, 1, 1, 2, 3, 1, 3, 3, 4, 2, + 3, 1, 3, 3, 4, 1, 1, 1, 1, 2, + 1, 1, 1, 1, 1, 1, 1, 4, 3, 1, + 3, 1, 2, +} + +var yyChk = [...]int16{ + -1000, -3, -1, -2, 10, 15, 11, -7, -6, -10, + 7, 16, -9, -8, -11, 8, 11, 12, 13, 14, + 16, 12, 13, 14, -6, -4, -12, -5, 17, 4, + 8, 7, 5, 6, -13, -3, 13, -8, -4, 13, + -5, 17, 18, -15, -14, 9, -4, -4, 9, 19, + 20, 14, 19, -14, +} + +var yyDef = [...]int8{ + 0, -2, 1, 2, 0, 0, 3, 0, 5, 0, + 15, 9, 0, 11, 0, 16, 4, 0, 0, 0, + 10, 0, 0, 0, 6, 7, 17, 18, 0, 20, + 21, 22, 23, 24, 25, 26, 0, 12, 13, 0, + 19, 0, 0, 0, 29, 31, 8, 14, 0, 28, + 0, 32, 27, 30, +} + +var yyTok1 = [...]int8{ + 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 12, 3, 20, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 13, 3, + 17, 18, 19, 14, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 10, 3, 11, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 15, 3, 16, +} + +var yyTok2 = [...]int8{ + 2, 3, 4, 5, 6, 7, 8, 9, +} + +var yyTok3 = [...]int8{ + 0, +} + +var yyErrorMessages = [...]struct { + state int + token int + msg string +}{} + +//line yaccpar:1 + +/* parser for yacc output */ + +var ( + yyDebug = 0 + yyErrorVerbose = true +) + +type yyLexer interface { + Lex(lval *yySymType) int + Error(s string) +} + +type yyParser interface { + Parse(yyLexer) int + Lookahead() int +} + +type yyParserImpl struct { + lval yySymType + stack [yyInitialStackSize]yySymType + char int +} + +func (p *yyParserImpl) Lookahead() int { + return p.char +} + +func yyNewParser() yyParser { + return &yyParserImpl{} +} + +const yyFlag = -1000 + +func yyTokname(c int) string { + if c >= 1 && c-1 < len(yyToknames) { + if yyToknames[c-1] != "" { + return yyToknames[c-1] + } + } + return __yyfmt__.Sprintf("tok-%v", c) +} + +func yyStatname(s int) string { + if s >= 0 && s < len(yyStatenames) { + if yyStatenames[s] != "" { + return yyStatenames[s] + } + } + return __yyfmt__.Sprintf("state-%v", s) +} + +func yyErrorMessage(state, lookAhead int) string { + const TOKSTART = 4 + + if !yyErrorVerbose { + return "syntax error" + } + + for _, e := range yyErrorMessages { + if e.state == state && e.token == lookAhead { + return "syntax error: " + e.msg + } + } + + res := "syntax error: unexpected " + yyTokname(lookAhead) + + // To match Bison, suggest at most four expected tokens. + expected := make([]int, 0, 4) + + // Look for shiftable tokens. + base := int(yyPact[state]) + for tok := TOKSTART; tok-1 < len(yyToknames); tok++ { + if n := base + tok; n >= 0 && n < yyLast && int(yyChk[int(yyAct[n])]) == tok { + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + } + + if yyDef[state] == -2 { + i := 0 + for yyExca[i] != -1 || int(yyExca[i+1]) != state { + i += 2 + } + + // Look for tokens that we accept or reduce. + for i += 2; yyExca[i] >= 0; i += 2 { + tok := int(yyExca[i]) + if tok < TOKSTART || yyExca[i+1] == 0 { + continue + } + if len(expected) == cap(expected) { + return res + } + expected = append(expected, tok) + } + + // If the default action is to accept or reduce, give up. + if yyExca[i+1] != 0 { + return res + } + } + + for i, tok := range expected { + if i == 0 { + res += ", expecting " + } else { + res += " or " + } + res += yyTokname(tok) + } + return res +} + +func yylex1(lex yyLexer, lval *yySymType) (char, token int) { + token = 0 + char = lex.Lex(lval) + if char <= 0 { + token = int(yyTok1[0]) + goto out + } + if char < len(yyTok1) { + token = int(yyTok1[char]) + goto out + } + if char >= yyPrivate { + if char < yyPrivate+len(yyTok2) { + token = int(yyTok2[char-yyPrivate]) + goto out + } + } + for i := 0; i < len(yyTok3); i += 2 { + token = int(yyTok3[i+0]) + if token == char { + token = int(yyTok3[i+1]) + goto out + } + } + +out: + if token == 0 { + token = int(yyTok2[1]) /* unknown char */ + } + if yyDebug >= 3 { + __yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char)) + } + return char, token +} + +func yyParse(yylex yyLexer) int { + return yyNewParser().Parse(yylex) +} + +func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int { + var yyn int + var yyVAL yySymType + var yyDollar []yySymType + _ = yyDollar // silence set and not used + yyS := yyrcvr.stack[:] + + Nerrs := 0 /* number of errors */ + Errflag := 0 /* error recovery flag */ + yystate := 0 + yyrcvr.char = -1 + yytoken := -1 // yyrcvr.char translated into internal numbering + defer func() { + // Make sure we report no lookahead when not parsing. + yystate = -1 + yyrcvr.char = -1 + yytoken = -1 + }() + yyp := -1 + goto yystack + +ret0: + return 0 + +ret1: + return 1 + +yystack: + /* put a state and value onto the stack */ + if yyDebug >= 4 { + __yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate)) + } + + yyp++ + if yyp >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyS[yyp] = yyVAL + yyS[yyp].yys = yystate + +yynewstate: + yyn = int(yyPact[yystate]) + if yyn <= yyFlag { + goto yydefault /* simple state */ + } + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + yyn += yytoken + if yyn < 0 || yyn >= yyLast { + goto yydefault + } + yyn = int(yyAct[yyn]) + if int(yyChk[yyn]) == yytoken { /* valid shift */ + yyrcvr.char = -1 + yytoken = -1 + yyVAL = yyrcvr.lval + yystate = yyn + if Errflag > 0 { + Errflag-- + } + goto yystack + } + +yydefault: + /* default state action */ + yyn = int(yyDef[yystate]) + if yyn == -2 { + if yyrcvr.char < 0 { + yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval) + } + + /* look through exception table */ + xi := 0 + for { + if yyExca[xi+0] == -1 && int(yyExca[xi+1]) == yystate { + break + } + xi += 2 + } + for xi += 2; ; xi += 2 { + yyn = int(yyExca[xi+0]) + if yyn < 0 || yyn == yytoken { + break + } + } + yyn = int(yyExca[xi+1]) + if yyn < 0 { + goto ret0 + } + } + if yyn == 0 { + /* error ... attempt to resume parsing */ + switch Errflag { + case 0: /* brand new error */ + yylex.Error(yyErrorMessage(yystate, yytoken)) + Nerrs++ + if yyDebug >= 1 { + __yyfmt__.Printf("%s", yyStatname(yystate)) + __yyfmt__.Printf(" saw %s\n", yyTokname(yytoken)) + } + fallthrough + + case 1, 2: /* incompletely recovered error ... try again */ + Errflag = 3 + + /* find a state where "error" is a legal shift action */ + for yyp >= 0 { + yyn = int(yyPact[yyS[yyp].yys]) + yyErrCode + if yyn >= 0 && yyn < yyLast { + yystate = int(yyAct[yyn]) /* simulate a shift of "error" */ + if int(yyChk[yystate]) == yyErrCode { + goto yystack + } + } + + /* the current p has no shift on "error", pop stack */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys) + } + yyp-- + } + /* there is no state on the stack with an error shift ... abort */ + goto ret1 + + case 3: /* no shift yet; clobber input char */ + if yyDebug >= 2 { + __yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken)) + } + if yytoken == yyEofCode { + goto ret1 + } + yyrcvr.char = -1 + yytoken = -1 + goto yynewstate /* try again in the same state */ + } + } + + /* reduction by production yyn */ + if yyDebug >= 2 { + __yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate)) + } + + yynt := yyn + yypt := yyp + _ = yypt // guard against "declared and not used" + + yyp -= int(yyR2[yyn]) + // yyp is now the index of $0. Perform the default action. Iff the + // reduced production is ε, $1 is possibly out of range. + if yyp+1 >= len(yyS) { + nyys := make([]yySymType, len(yyS)*2) + copy(nyys, yyS) + yyS = nyys + } + yyVAL = yyS[yyp+1] + + /* consult goto table to find next state */ + yyn = int(yyR1[yyn]) + yyg := int(yyPgo[yyn]) + yyj := yyg + yyS[yyp].yys + 1 + + if yyj >= yyLast { + yystate = int(yyAct[yyg]) + } else { + yystate = int(yyAct[yyj]) + if int(yyChk[yystate]) != -yyn { + yystate = int(yyAct[yyg]) + } + } + // dummy call; replaced with literal code + switch yynt { + + case 1: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:46 + { + yylex.(*lex).out = yyDollar[1].pattern + } + case 2: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:47 + { + yylex.(*lex).out = yyDollar[1].pattern + } + case 3: + yyDollar = yyS[yypt-2 : yypt+1] +//line grammar.y:50 + { + yyVAL.pattern = Array{} + } + case 4: + yyDollar = yyS[yypt-3 : yypt+1] +//line grammar.y:51 + { + yyVAL.pattern = Array{yyDollar[2].arrdefl} + } + case 5: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:54 + { + yyVAL.arrdefl = []ArrayDefinition{yyDollar[1].arrdef} + } + case 6: + yyDollar = yyS[yypt-3 : yypt+1] +//line grammar.y:55 + { + yyVAL.arrdefl = append(yyDollar[1].arrdefl, yyDollar[3].arrdef) + } + case 7: + yyDollar = yyS[yypt-3 : yypt+1] +//line grammar.y:58 + { + yyVAL.arrdef = ArrayDefinition{Index: yyDollar[1].ind, Optional: false, Assignment: yyDollar[3].ass} + } + case 8: + yyDollar = yyS[yypt-4 : yypt+1] +//line grammar.y:59 + { + yyVAL.arrdef = ArrayDefinition{Index: yyDollar[1].ind, Optional: true, Assignment: yyDollar[4].ass} + } + case 9: + yyDollar = yyS[yypt-2 : yypt+1] +//line grammar.y:62 + { + yyVAL.pattern = Object{} + } + case 10: + yyDollar = yyS[yypt-3 : yypt+1] +//line grammar.y:63 + { + yyVAL.pattern = Object{yyDollar[2].objdefl} + } + case 11: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:66 + { + yyVAL.objdefl = []ObjectDefinition{yyDollar[1].objdef} + } + case 12: + yyDollar = yyS[yypt-3 : yypt+1] +//line grammar.y:67 + { + yyVAL.objdefl = append(yyDollar[1].objdefl, yyDollar[3].objdef) + } + case 13: + yyDollar = yyS[yypt-3 : yypt+1] +//line grammar.y:70 + { + yyVAL.objdef = ObjectDefinition{Key: yyDollar[1].key, Optional: false, Assignment: yyDollar[3].ass} + } + case 14: + yyDollar = yyS[yypt-4 : yypt+1] +//line grammar.y:71 + { + yyVAL.objdef = ObjectDefinition{Key: yyDollar[1].key, Optional: true, Assignment: yyDollar[4].ass} + } + case 15: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:74 + { + yyVAL.ind = Number(yyDollar[1].num) + } + case 16: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:78 + { + yyVAL.key = String(yyDollar[1].str) + } + case 17: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:82 + { + yyVAL.ass = yyDollar[1].bnd + } + case 18: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:83 + { + yyVAL.ass = yyDollar[1].ass + } + case 19: + yyDollar = yyS[yypt-2 : yypt+1] +//line grammar.y:84 + { + yyVAL.ass = BoundLiteral{yyDollar[1].bnd, Assignment(yyDollar[2].ass)} + } + case 20: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:87 + { + yyVAL.ass = Null{} + } + case 21: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:88 + { + yyVAL.ass = String(yyDollar[1].str) + } + case 22: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:89 + { + yyVAL.ass = Number(yyDollar[1].num) + } + case 23: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:90 + { + yyVAL.ass = Boolean(true) + } + case 24: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:91 + { + yyVAL.ass = Boolean(false) + } + case 25: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:93 + { + yyVAL.ass = yyDollar[1].ref + } + case 26: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:94 + { + yyVAL.ass = yyDollar[1].pattern + } + case 27: + yyDollar = yyS[yypt-4 : yypt+1] +//line grammar.y:97 + { + yyVAL.bnd = Binding(yyDollar[3].str) + } + case 28: + yyDollar = yyS[yypt-3 : yypt+1] +//line grammar.y:100 + { + yyVAL.ref = Reference(yyDollar[2].opidl) + } + case 29: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:103 + { + yyVAL.opidl = []OptionalIdentifier{yyDollar[1].opid} + } + case 30: + yyDollar = yyS[yypt-3 : yypt+1] +//line grammar.y:104 + { + yyVAL.opidl = append(yyDollar[1].opidl, yyDollar[3].opid) + } + case 31: + yyDollar = yyS[yypt-1 : yypt+1] +//line grammar.y:108 + { + yyVAL.opid = OptionalIdentifier{Identifier: Identifier(yyDollar[1].str), Optional: false} + } + case 32: + yyDollar = yyS[yypt-2 : yypt+1] +//line grammar.y:109 + { + yyVAL.opid = OptionalIdentifier{Identifier: Identifier(yyDollar[1].str), Optional: true} + } + } + goto yystack /* stack new state and value */ +} diff --git a/pattern/grammar.y b/pattern/grammar.y new file mode 100644 index 0000000..dba5620 --- /dev/null +++ b/pattern/grammar.y @@ -0,0 +1,108 @@ +%{ +package pattern +%} + +%union{ + num float64 + str string + + pattern Pattern + arrdef ArrayDefinition + arrdefl []ArrayDefinition + objdef ObjectDefinition + objdefl []ObjectDefinition + opid OptionalIdentifier + opidl []OptionalIdentifier + ass Assignment + ref Reference + ind Index + key Key + bnd Binding +} + +%token NULL TRUE FALSE +%token NUMBER +%token STRING IDENTIFIER + +%type array object pattern +%type assignment literal +%type array_definition +%type array_definition_list +%type object_definition +%type object_definition_list +%type index +%type key +%type binding +%type reference +%type optional_identifier +%type optional_identifier_list + + +%start pattern + +%% + +pattern + : array { yylex.(*lex).out = $1 } + | object { yylex.(*lex).out = $1 } + +array + : '[' ']' { $$ = Array{} } + | '[' array_definition_list ']' { $$ = Array{$2} } + +array_definition_list + : array_definition { $$ = []ArrayDefinition{$1} } + | array_definition_list ',' array_definition { $$ = append($1, $3) } + +array_definition + : index ':' assignment { $$ = ArrayDefinition{Index: $1, Optional: false, Assignment: $3} } + | index '?' ':' assignment { $$ = ArrayDefinition{Index: $1, Optional: true, Assignment: $4} } + +object + : '{' '}' { $$ = Object{} } + | '{' object_definition_list '}' { $$ = Object{$2} } + +object_definition_list + : object_definition { $$ = []ObjectDefinition{$1} } + | object_definition_list ',' object_definition { $$ = append($1, $3) } + +object_definition + : key ':' assignment { $$ = ObjectDefinition{Key: $1, Optional: false, Assignment: $3} } + | key '?' ':' assignment { $$ = ObjectDefinition{Key: $1, Optional: true, Assignment: $4} } + +index + : NUMBER { $$ = Number($1) } + /* | reference { $$ = $1 } */ + +key + : STRING { $$ = String($1) } + /* | reference { $$ = $1 } */ + +assignment + : binding { $$ = $1 } + | literal { $$ = $1 } + | binding literal { $$ = BoundLiteral{$1, Assignment($2)} } + +literal + : NULL { $$ = Null{} } + | TRUE { $$ = Boolean(true) } + | FALSE { $$ = Boolean(false) } + | STRING { $$ = String($1) } + | NUMBER { $$ = Number($1) } + | reference { $$ = $1 } + | pattern { $$ = $1 } + +binding + : '<' '=' IDENTIFIER '>' { $$ = Binding($3) } + +reference + : '<' optional_identifier_list '>' { $$ = Reference($2) } + +optional_identifier_list + : optional_identifier { $$ = []OptionalIdentifier{$1} } + | optional_identifier_list '.' optional_identifier { $$ = append($1, $3) } + + +optional_identifier + : IDENTIFIER { $$ = OptionalIdentifier{Identifier: Identifier($1), Optional: false} } + | IDENTIFIER '?' { $$ = OptionalIdentifier{Identifier: Identifier($1), Optional: true} } \ No newline at end of file diff --git a/pattern/object.go b/pattern/object.go new file mode 100644 index 0000000..eaa63fd --- /dev/null +++ b/pattern/object.go @@ -0,0 +1,113 @@ +package pattern + +import ( + "encoding/json" + "fmt" +) + +func (o Object) Validate(bindings map[string]bool) error { + keys := make(map[string]bool) + for _, d := range o.Definitions { + key := d.Key.String() + if _, exists := keys[key]; exists { + return fmt.Errorf("duplicate key %s", key) + } + + keys[key] = true + } + + for _, d := range o.Definitions { + if err := d.Validate(bindings); err != nil { + return fmt.Errorf("at key %s: %s", d.Key, err) + } + } + + return nil +} + +func (o Object) Interpret(s string) (bindings map[string]string, err error) { + return o.Match(s, map[string]string{}) +} + +func (o Object) Match(s string, old_bindings map[string]string) (new_bindings map[string]string, err error) { + new_bindings = map[string]string{} + + bindings := map[string]string{} + for k, v := range old_bindings { + bindings[k] = v + } + + var input map[string]json.RawMessage + err = json.Unmarshal([]byte(s), &input) + if err != nil { + input := "input" + if len(s) < 10 { + input = "\"" + s + "\"" + } + + return nil, fmt.Errorf("%s could not be interpreted as an object: %w", input, err) + } + + for _, definition := range o.Definitions { + key, err := definition.Key.Key() + if err != nil { + return nil, err + } + + prefix := "" + if definition.Key.String() != key { + prefix = definition.Key.String() + " = " + } + + value, key_exists := input[key] + if !key_exists { + if definition.Optional { + continue + } else { + return nil, fmt.Errorf("object did not contain required field %s\"%s\"", prefix, key) + } + } + + matched, err := definition.Assignment.Match(string(value), bindings) + if err != nil { + return nil, fmt.Errorf("could not match field %s\"%s\": %s", prefix, key, err) + } + + for k, v := range matched { + if _, k_exists := bindings[k]; k_exists { + return nil, fmt.Errorf("binding for %s already exists and cannot be overwritten", k) + } + + bindings[k] = v + new_bindings[k] = v + } + } + + return new_bindings, nil +} + +func (o Object) String() string { + s := "{" + + for i, definition := range o.Definitions { + s += "\n" + indent(definition.String()) + + if i < len(o.Definitions)-1 { + s += "," + } else { + s += "\n" + } + } + + s += "}" + + return s +} + +func (o ObjectDefinition) String() string { + op := "" + if o.Optional { + op = "?" + } + return o.Key.String() + op + ": " + o.Assignment.String() +} diff --git a/pattern/parser.go b/pattern/parser.go new file mode 100644 index 0000000..cd025a1 --- /dev/null +++ b/pattern/parser.go @@ -0,0 +1,164 @@ +package pattern + +import ( + "fmt" + "strconv" + "strings" + "unicode" +) + +//go:generate goyacc -o grammar.go grammar.y + +const EOF = 0 + +type lex struct { + input []rune + i int + + out Pattern + err error +} + +func (l *lex) Error(s string) { + if l.err == nil { + l.err = fmt.Errorf(s) + } else { + l.err = fmt.Errorf("%s: %s", l.err, s) + } +} + +func (l *lex) Lex(lval *yySymType) int { + for unicode.IsSpace(l.next()) && l.next() != EOF { + l.take() + } + + switch l.next() { + case '[', ']', '{', '}', ':', ',', '<', '>', '=', '?', EOF: + return int(l.take()) + case '0', '9', '8', '7', '6', '5', '4', '3', '2', '1': + return l.num(lval) + case '"': + return l.str(lval) + default: + if l.match("true") { + return TRUE + } + + if l.match("false") { + return FALSE + } + + if l.match("null") { + return NULL + } + + if unicode.IsLetter(l.next()) { + return l.ident(lval) + } + + l.Error(fmt.Sprintf("unrecognised character %c", l.next())) + return yyErrCode + } +} + +func (l *lex) at(i int) rune { + if l.i+i < len(l.input) { + return l.input[l.i+i] + } + + return EOF +} + +func (l *lex) next() rune { + return l.at(0) +} + +func (l *lex) take() rune { + c := l.next() + l.i++ + return c +} + +func (l *lex) match(s string) bool { + if len(s) > len(l.input)-l.i { + return false + } + + for i, r := range s { + if r != l.at(i) { + return false + } + } + + l.i += len(s) + return true +} + +func (l *lex) num(lval *yySymType) int { + var s strings.Builder + s.WriteRune(l.take()) + + for unicode.IsNumber(l.next()) { + s.WriteRune(l.take()) + } + + if unicode.IsLetter(l.next()) { + l.Error(fmt.Sprintf("unexpected character %c in number", l.next())) + return yyErrCode + } + + n, err := strconv.ParseFloat(s.String(), 64) + if err != nil { + l.Error(err.Error()) + return yyErrCode + } + lval.num = n + + return NUMBER +} + +func (l *lex) str(lval *yySymType) int { + l.take() + var s strings.Builder + + for l.next() != '"' && l.next() != EOF { + s.WriteRune(l.take()) + } + + if l.next() != '"' { + l.Error("improperly terminated string, reached EOF") + return yyErrCode + } + l.take() + lval.str = s.String() + + return STRING +} + +func (l *lex) ident(lval *yySymType) int { + var s strings.Builder + s.WriteRune(l.take()) + + for unicode.IsLetter(l.next()) || unicode.IsNumber(l.next()) { + s.WriteRune(l.take()) + } + + lval.str = s.String() + + return IDENTIFIER +} + +func Parse(s string) (Pattern, error) { + l := lex{input: []rune(s)} + + if yyParse(&l) != 0 { + return nil, l.err + } + + err := l.out.Validate(map[string]bool{}) + if err != nil { + return nil, err + } + + return l.out, nil +} diff --git a/pattern/pattern.go b/pattern/pattern.go new file mode 100644 index 0000000..6561e27 --- /dev/null +++ b/pattern/pattern.go @@ -0,0 +1,68 @@ +package pattern + +import "strings" + +type Pattern interface { + Interpret(string) (map[string]string, error) + Assignment +} + +type Object struct { + Definitions []ObjectDefinition +} + +type ObjectDefinition struct { + Key Key + Optional bool + Assignment +} + +type Array struct { + Definitions []ArrayDefinition +} + +type ArrayDefinition struct { + Index Index + Optional bool + Assignment +} + +type Key interface { + Key() (string, error) + String() string +} +type Index interface { + Index() (int, error) + String() string +} + +type Assignment interface { + Match(string, map[string]string) (map[string]string, error) + Validate(bindings map[string]bool) error + String() string +} + +type Null struct{} +type Identifier string +type String string +type Number float64 +type Boolean bool + +type Binding Identifier +type Reference []OptionalIdentifier + +type BoundLiteral struct { + Binding + Assignment +} + +type OptionalIdentifier struct { + Identifier + Optional bool +} + +const INDENT = " " + +func indent(s string) string { + return INDENT + strings.ReplaceAll(s, "\n", "\n"+INDENT) +} diff --git a/pattern/pattern_test.go b/pattern/pattern_test.go new file mode 100644 index 0000000..073a2e9 --- /dev/null +++ b/pattern/pattern_test.go @@ -0,0 +1,173 @@ +package pattern_test + +import ( + "testing" + + "github.com/xenomote/object_language/pattern" +) + +func TestParse(t *testing.T) { + tests := []struct { + name string + input string + shouldParse bool + }{ + {"empty object", `{}`, true}, + {"empty array", `[]`, true}, + + {"object with one field", `{"a": 123}`, true}, + {"object with one binding", `{"a": <=x>}`, true}, + {"object with bound field", `{"a": <=x> 123}`, true}, + + {"object with optional field", `{"a"?: 123}`, true}, + {"object with optional binding", `{"a"?: <=x>}`, true}, + {"object with optional bound field", `{"a"?: <=x> 123}`, true}, + + {"object with reference", `{"a": <=x>, "b": }`, true}, + {"object with self reference", `{"a": <=x> }`, false}, + {"object with nested reference", `{"a": {"b": <=x>}, "c": }`, true}, + {"object with nested self reference", `{"a": <=x> {"b": }}`, false}, + + {"object with nested object", `{"a": {}}`, true}, + {"object with nested array", `{"a": []}`, true}, + + {"object with duplicate key", `{"a": 123, "a": 456}`, false}, + {"object with numeric key", `{1: 123}`, false}, + + {"array with one field", `[0: 1]`, true}, + {"array with one binding", `[0: <=x>]`, true}, + {"array with bound field", `[0: <=x> 1]`, true}, + + {"array with reference", `[0: <=x>, 1: ]`, true}, + {"array with self reference", `[0: <=x> ]`, false}, + {"array with nested reference", `[0: [0: <=x>], 1: ]`, true}, + {"array with nested self reference", `[0: <=x> [0: ]]`, false}, + + {"array with optional field", `[0?: 1]`, true}, + {"array with optional binding", `[0?: <=x>]`, true}, + {"array with optional bound field", `[0?: <=x> 1]`, true}, + + {"array with nested object", `[0: {}]`, true}, + {"array with nested array", `[0: []]`, true}, + + {"array with duplicate index", `[0: 1, 0: 2]`, false}, + {"array with string index", `["a": 123]`, false}, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + _, err := pattern.Parse(test.input) + parsed := (err == nil) + if parsed != test.shouldParse { + if test.shouldParse { + t.Fatalf("%s failed to parse: %s", test.name, err) + } else { + t.Fatalf("%s should not have parsed", test.name) + } + + } + }) + } +} + +func TestInterpret(t *testing.T) { + tests := []struct { + pattern string + input string + shouldMatch bool + output map[string]string + }{ + {`{}`, `[]`, false, nil}, + {`[]`, `{}`, false, nil}, + + {`{}`, `{}`, true, nil}, + {`{}`, `{"a": 1, "b": 2, "c": 3}`, true, nil}, + + {`{"a": 1}`, `{}`, false, nil}, + {`{"a": 1}`, `{"b": 1}`, false, nil}, + {`{"a": 1}`, `{"a": 2}`, false, nil}, + {`{"a": 1}`, `{"a": 1}`, true, nil}, + + {`{"a"?: 1}`, `{}`, true, nil}, + {`{"a"?: 1}`, `{"a": 1}`, true, nil}, + {`{"a"?: 1}`, `{"a": 2}`, false, nil}, + + {`{"a": <=x>}`, `{"a": 1}`, true, map[string]string{"x": "1"}}, + {`{"a": <=x>, "b": <=y>}`, `{"a": 1, "b": 2}`, true, map[string]string{"x": "1", "y": "2"}}, + {`{"a": <=x>, "b": <=y>}`, `{"b": 2, "a": 1}`, true, map[string]string{"x": "1", "y": "2"}}, + {`{"b": <=y>, "a": <=x>}`, `{"a": 1, "b": 2}`, true, map[string]string{"x": "1", "y": "2"}}, + {`{"b": <=y>, "a": <=x>}`, `{"b": 2, "a": 1}`, true, map[string]string{"x": "1", "y": "2"}}, + + {`{"a"?: <=x>}`, `{"a": 1}`, true, map[string]string{"x": "1"}}, + {`{"a"?: <=x>}`, `{}`, true, nil}, + + {`[]`, `[]`, true, nil}, + {`[]`, `[1, 2, 3]`, true, nil}, + + {`[1: 1]`, `[]`, false, nil}, + {`[1: 1]`, `[1]`, false, nil}, + {`[1: 1]`, `[1, 2]`, false, nil}, + {`[1: 1]`, `[2, 1]`, true, nil}, + + {`[0?: 1]`, `[]`, true, nil}, + {`[0?: 1]`, `[1]`, true, nil}, + {`[0?: 1]`, `[2]`, false, nil}, + + {`[1: <=x>]`, `[2, 1]`, true, map[string]string{"x": "1"}}, + {`[0: <=x>, 1: <=y>]`, `[1, 2]`, true, map[string]string{"x": "1", "y": "2"}}, + {`[1: <=y>, 0: <=x>]`, `[1, 2]`, true, map[string]string{"x": "1", "y": "2"}}, + + {`[0?: <=x>]`, `[1]`, true, map[string]string{"x": "1"}}, + {`[0?: <=x>]`, `[]`, true, nil}, + + {`{"a": <=x>, "b": }`, `{"a": 1, "b": 1}`, true, map[string]string{"x": "1"}}, + {`{"a": <=x>, "b": }`, `{"a": 1, "b": 2}`, false, nil}, + + {`{"a": {"b": <=x>}, "c": }`, `{"a": {"b": 1}, "c": 1}`, true, map[string]string{"x": "1"}}, + {`{"a": {"b": <=x>}, "c": }`, `{"a": {"b": 1}, "c": 2}`, false, nil}, + + {`[0: <=x>, 1: ]`, `[1, 1]`, true, map[string]string{"x": "1"}}, + {`[0: <=x>, 1: ]`, `[1, 2]`, false, nil}, + + {`[0: [0: <=x>], 1: ]`, `[[1], 1]`, true, map[string]string{"x": "1"}}, + {`[0: [0: <=x>], 1: ]`, `[[1], 2]`, false, nil}, + } + + for _, test := range tests { + name := test.pattern + " -> " + test.input + t.Run(name, func(t *testing.T) { + p, err := pattern.Parse(test.pattern) + if err != nil { + t.Fatal(err) + } + + b, err := p.Interpret(test.input) + if test.shouldMatch != (err == nil) { + if test.shouldMatch { + t.Fatalf("%s failed to match: %s", name, err) + } else { + t.Fatalf("%s should not have matched", name) + } + + } + + for k := range test.output { + if _, exists := b[k]; !exists { + t.Errorf("output did not contain expected binding %s", k) + } + } + + for k := range b { + if _, exists := test.output[k]; !exists { + t.Errorf("output contained unexpected binding %s", k) + } + } + + for k, v1 := range test.output { + if v2, exists := b[k]; exists && v1 != v2 { + t.Errorf("output for %s did not match expected value: %s != %s", k, v2, v1) + } + } + }) + } +} diff --git a/pattern/reference.go b/pattern/reference.go new file mode 100644 index 0000000..db1c386 --- /dev/null +++ b/pattern/reference.go @@ -0,0 +1,47 @@ +package pattern + +import "fmt" + +func (r Reference) String() string { + s := "<" + + for i, identifier := range r { + s += string(identifier.Identifier) + + if identifier.Optional { + s += "?" + } else if i != len(r)-1 { + s += "." + } + } + + s += ">" + + return s +} + +func (r Reference) Validate(bindings map[string]bool) error { + bind := string(r[0].Identifier) + exists := bindings[bind] + bindings[bind] = true + + if !exists { + return fmt.Errorf("reference to %s before it was bound", r) + } + + return nil +} + +func (r Reference) Match(s string, bindings map[string]string) (map[string]string, error) { + bind := string(r[0].Identifier) + o, exists := bindings[bind] + if !exists { + return nil, fmt.Errorf("referenced binding %s was not available, was it matched in an optional section?", r) + } + + if s != o { + return nil, fmt.Errorf("reference to binding %s did not match expected value: %s != %s", r, s, o) + } + + return map[string]string{}, nil +} diff --git a/pattern/values.go b/pattern/values.go new file mode 100644 index 0000000..925ac37 --- /dev/null +++ b/pattern/values.go @@ -0,0 +1,138 @@ +package pattern + +import ( + "fmt" + "strconv" +) + +func (Null) Validate(map[string]bool) error { + return nil +} + +func (Null) Match(s string, _ map[string]string) (map[string]string, error) { + if s != "null" { + return nil, fmt.Errorf("expected null but matched %s", s) + } + + return map[string]string{}, nil +} + +func (Null) String() string { + return "null" +} + +func (i Number) Index() (int, error) { + return int(i), nil +} + +func (n Number) String() string { + return fmt.Sprint(float64(n)) +} + +func (Number) Validate(map[string]bool) error { + return nil +} + +func (n Number) Match(s string, _ map[string]string) (map[string]string, error) { + m, err := strconv.ParseFloat(s, 64) + if err != nil { + return nil, fmt.Errorf("expected %s but matched value %s could not be interpreted as a number", n, s) + } + + if float64(n) != m { + return nil, fmt.Errorf("expected %s but matched value %s", n, s) + } + + return nil, nil +} + +func (k String) Key() (string, error) { + return string(k), nil +} + +func (s String) String() string { + return string(s) +} + +func (String) Validate(map[string]bool) error { + return nil +} + +func (t String) Match(s string, _ map[string]string) (map[string]string, error) { + if s[0] != '"' || s[len(s)-1] != '"' { + return nil, fmt.Errorf("expected \"%s\" but matched value \"%s\" could not be interpreted as a string", t, s) + } + + if string(t) != s[1:len(s)-1] { + return nil, fmt.Errorf("expected \"%s\" but matched value %s", t, s) + } + + return nil, nil +} + +func (b Boolean) String() string { + if b { + return "true" + } else { + return "false" + } +} + +func (Boolean) Validate(map[string]bool) error { + return nil +} + +func (b Boolean) Match(s string, _ map[string]string) (map[string]string, error) { + x, err := strconv.ParseBool(s) + if err != nil { + return nil, fmt.Errorf("expected %s but matched value %s could not be interpreted as a boolean", b, s) + } + + if bool(b) != x { + return nil, fmt.Errorf("expected %s but matched value %s", b, s) + } + + return nil, nil +} + +func (b BoundLiteral) Match(s string, old_bindings map[string]string) (map[string]string, error) { + new_bindings := make(map[string]string) + + a_matched, err := b.Assignment.Match(s, old_bindings) + if err != nil { + return nil, err + } + + for k, v := range a_matched { + new_bindings[k] = v + } + + b_matched, err := b.Binding.Match(s, old_bindings) + if err != nil { + return nil, err + } + + for k, v := range b_matched { + new_bindings[k] = v + } + + return new_bindings, nil +} + +func (b BoundLiteral) Validate(bindings map[string]bool) error { + bind := string(b.Binding) + if err := b.Assignment.Validate(bindings); err != nil { + if bindings[bind] { + return fmt.Errorf("illegal self reference to %s", b.Binding) + } else { + return err + } + } + + bindings[bind] = true + return nil +} + +func (b BoundLiteral) String() string { + return b.Binding.String() + b.Assignment.String() +}