Skip to content

Commit

Permalink
*: refactor the yaccgo
Browse files Browse the repository at this point in the history
  • Loading branch information
acekingke committed Nov 21, 2022
1 parent 1687bcc commit c8d7268
Show file tree
Hide file tree
Showing 5 changed files with 237 additions and 23 deletions.
45 changes: 34 additions & 11 deletions Parser/Lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,9 +239,9 @@ func ActionState(l *lexer) stateFn {
case unicode.IsDigit(r):
l.acceptRun("0123456789")
l.emit(ActionN)
case l.acceptWord("accept"): //$accept
case l.acceptOnlyAlphaWord("accept"): //$accept
l.emit(ActionAccept)
case l.acceptWord("end"): //$end
case l.acceptOnlyAlphaWord("end"): //$end
l.emit(ActionEnd)
default:
l.error("Action lexer error")
Expand All @@ -265,31 +265,31 @@ func DirectiveState(l *lexer) stateFn {
}

func DirectiveOtherState(l *lexer) stateFn {
if l.acceptWord("type") {
if l.acceptOnlyAlphaWord("type") {
l.emit(TypeDirective)
}
if l.acceptWord("token") {
if l.acceptOnlyAlphaWord("token") {
l.emit(TokenDirective)
}
if l.acceptWord("union") {
if l.acceptOnlyAlphaWord("union") {
return DirectiveUnionState
}
if l.acceptWord("left") {
if l.acceptOnlyAlphaWord("left") {
l.emit(LeftAssoc)
}
if l.acceptWord("right") {
if l.acceptOnlyAlphaWord("right") {
l.emit(RightAssoc)
}
if l.acceptWord("nonassoc") {
if l.acceptOnlyAlphaWord("nonassoc") {
l.emit(NoneAssoc)
}
if l.acceptWord("prec") {
if l.acceptOnlyAlphaWord("prec") {
l.emit(PrecDirective)
}
if l.acceptWord("precedence") {
if l.acceptOnlyAlphaWord("precedence") {
l.emit(Precedence)
}
if l.acceptWord("start") {
if l.acceptOnlyAlphaWord("start") {
l.emit(StartDirective)
}
return rootState
Expand Down Expand Up @@ -455,6 +455,29 @@ func (l *lexer) acceptRun(valid string) {
l.backup()
}

func (l *lexer) acceptOnlyAlphaWord(word string) bool {
pos, loc, prev := l.end, l.loc, l.prev

// Skip spaces (U+0020) if any
r := l.peek()
for ; r == ' '; r = l.peek() {
l.next()
}

for _, ch := range word {
if l.next() != ch {
l.end, l.loc, l.prev = pos, loc, prev
return false
}
}
if r = l.peek(); unicode.IsLetter(r) {
l.end, l.loc, l.prev = pos, loc, prev
return false
}

return true
}

func (l *lexer) acceptWord(word string) bool {
pos, loc, prev := l.end, l.loc, l.prev

Expand Down
80 changes: 71 additions & 9 deletions Parser/Parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ type parser struct {
peekCount int
pos int
err error

// map Def
TokenDefMap map[string]bool
}

type Node interface {
Expand Down Expand Up @@ -130,11 +133,12 @@ func Parse(input string) (*RootNode, error) {
lex: Lex(input),
pos: 0,
}
p.TokenDefMap = make(map[string]bool)
var nodeDeclare Node
if nodeDeclare = p.parseDeclare(); nodeDeclare == nil {
return nil, fmt.Errorf("do not has declare %s", p.err)
}

decl, _ := nodeDeclare.(*DeclareNode)
if !p.current.Is(Section) {
return nil, fmt.Errorf(
fmt.Sprintf("parser error! %s", p.current.Value),
Expand All @@ -143,14 +147,14 @@ func Parse(input string) (*RootNode, error) {
RuDlist := make([]RuleDef, 0)
p.next() // get the first identify
for {
if ruleslice := p.parseRule(); ruleslice == nil {
if ruleslice := p.parseRule(&decl.TokenDefList); ruleslice == nil {
break
} else {
RuDlist = append(RuDlist, ruleslice...)
}
}

if !p.current.Is(Section) {
if !p.current.Is(Section) && !p.current.Is(EOF) {
return nil, fmt.Errorf(fmt.Sprintf("parser err :%s", p.current.Value))
}
restcode := p.lex.input[p.current.EndAt:]
Expand Down Expand Up @@ -250,6 +254,7 @@ func (p *parser) parseTokendef() *TokenDef {
p.backup()
}
id.Value = value
p.TokenDefMap[id.Name] = true
Tokdef.IdentifyList = append(Tokdef.IdentifyList, id)
} else if p.current.Is(Charater) {
id := Idendity{
Expand All @@ -260,6 +265,7 @@ func (p *parser) parseTokendef() *TokenDef {
IDTyp: TERMID,
Alias: p.current.Value,
}
p.TokenDefMap[id.Name] = true
Tokdef.IdentifyList = append(Tokdef.IdentifyList, id)
} else {
break
Expand All @@ -270,8 +276,16 @@ func (p *parser) parseTokendef() *TokenDef {
return &Tokdef
}

func (p *parser) parsePrecList() []PrecDef {
// The Same as token,
/*
%left symbols…
%left <type> symbols…
*/
func (p *parser) parsePrecList(Tklist *[]TokenDef) []PrecDef {
var assocTy PrecAssocType
var Tokdef TokenDef
var IdName string
res := make([]PrecDef, 0)
if p.current.Is(LeftAssoc) {
Expand All @@ -282,13 +296,38 @@ func (p *parser) parsePrecList() []PrecDef {
assocTy = NonAssocType
}

Tag := ""
p.next()
// match <
if p.current.Is(LeftAngleBracket) {
// get Tag
p.next()
Tag = p.current.Value
p.next()
p.expect(RightAngleBracket) // match >
}
p.backup()
for {
p.next()
if p.current.Is(Identifier) || p.current.Is(Charater) {
// make loop get id or alias
IdName = p.current.Value
idvalue := 0
if p.current.Is(Charater) {
IdName = genTempName(IdName)
idvalue = int(p.current.Value[0])
}
if !p.TokenDefMap[IdName] {
id := Idendity{
Tag: Tag,
// noname need do for sepical.
Name: IdName,
Value: idvalue,
IDTyp: TERMID,
Alias: "",
}
p.TokenDefMap[IdName] = true
Tokdef.IdentifyList = append(Tokdef.IdentifyList, id)
}
node := PrecDef{
IdName: IdName,
Expand All @@ -300,6 +339,9 @@ func (p *parser) parsePrecList() []PrecDef {
break
}
}
if len(Tokdef.IdentifyList) != 0 {
*Tklist = append(*Tklist, Tokdef)
}
return res
}

Expand Down Expand Up @@ -339,7 +381,9 @@ func (p *parser) parseTypeList() []TypeDef {
return TypedefList
}

// startsymbol
// startsymbol indicatation the rules start from
// %start cmds it means cmds is the start symbols
// or else , the rules start symbol must be `start`
func (p *parser) parseStartSymbol() string {
p.next()
if p.current.Is(Identifier) {
Expand All @@ -357,7 +401,7 @@ func (p *parser) parseDeclare() Node {
var TokDefList []TokenDef
var PreDefList [][]PrecDef
var TypeDefList []TypeDef
var StartSym string
var StartSym string = "start"
p.next()
for !(p.current.Is(EOF) || p.current.Is(Section)) {
if p.current.Is(tokenError) {
Expand All @@ -381,7 +425,7 @@ func (p *parser) parseDeclare() Node {
p.current.Is(NoneAssoc) ||
// precDirective is just used to rules
p.current.Is(Precedence) {
PreDefList = append(PreDefList, p.parsePrecList())
PreDefList = append(PreDefList, p.parsePrecList(&TokDefList))
//Do not need call p.next
continue
}
Expand Down Expand Up @@ -415,8 +459,9 @@ func (p *parser) parseDeclare() Node {
ID RuleDefine {id/char %prec terminal-symbol |ActionQuote}*
| RuleOR {id/char |ActionQuote}*
*/
func (p *parser) parseRule() []RuleDef {
func (p *parser) parseRule(toklst *[]TokenDef) []RuleDef {
var Leftpart string
var Tokdef TokenDef
if p.current.Is(Identifier) {
Leftpart = p.current.Value
p.next()
Expand Down Expand Up @@ -449,6 +494,18 @@ func (p *parser) parseRule() []RuleDef {
ElemType: RightSyType,
Element: genTempName(p.current.Value),
})
if !p.TokenDefMap[genTempName(p.current.Value)] {
id := Idendity{
Tag: "",
// noname need do for sepical.
Name: genTempName(p.current.Value),
Value: int(p.current.Value[0]),
IDTyp: TERMID,
Alias: "",
}
p.TokenDefMap[genTempName(p.current.Value)] = true
Tokdef.IdentifyList = append(Tokdef.IdentifyList, id)
}
case Identifier:
rightpart = append(rightpart, RightSymOrAction{
ElemType: RightSyType,
Expand All @@ -475,10 +532,15 @@ func (p *parser) parseRule() []RuleDef {
}
default:
res = append(res, rule)
return res
goto out

}
rule.RightPart = rightpart
p.next()
}
out:
if len(Tokdef.IdentifyList) != 0 {
*toklst = append(*toklst, Tokdef)
}
return res
}
99 changes: 99 additions & 0 deletions Parser/Parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,102 @@ expr3:
root.LALR1 = lalr
}
}

func TestParser2(t *testing.T) {
str :=
`// Copyright 2013 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This is an example of a goyacc program.
// To build it:
// goyacc -p "expr" expr.y (produces y.go)
// go build -o expr y.go
// expr
// > <type an expression>
%{
package main
import (
"bufio"
"bytes"
"fmt"
"io"
"log"
"math/big"
"os"
"unicode/utf8"
)
%}
%left <tga> A B
%left C
%start s
%%
s : A
`
if tr, err := Parse(str); err != nil {
t.Error(err)
} else {
// work in test
var node Node = tr
w := DoWalker(&node, &RootVistor{})
lalr := w.BuildLALR1()
fmt.Println(lalr)
root := w.VistorNode.(*RootVistor)
root.LALR1 = lalr
}
}

func TestParser3(t *testing.T) {
str := `
%{
package main
%}
%union{
String string
Expr expr
}
%token<String> IDENTIFIER
%token<String> NUMBER 100
%type <Expr> expr assignment
%left '+' '-'
%left '*' '/'
%%
start: expr {yylex.(*interpreter).parseResult = &astRoot{$1}}
| assignment {yylex.(*interpreter).parseResult = $1}
expr:
NUMBER {$$ = &number{$1} }
| IDENTIFIER { $$ = &variable{$1}}
| expr '+' expr { $$ = &binaryExpr{Op: '+', lhs: $1, rhs: $3} }
| expr '-' expr { $$ = &binaryExpr{Op: '-', lhs: $1, rhs: $3} }
| expr '*' expr { $$ = &binaryExpr{Op: '*', lhs: $1, rhs: $3} }
| expr '/' expr { $$ = &binaryExpr{Op: '/', lhs: $1, rhs: $3} }
| '(' expr ')' { $$ = &parenExpr{$2}}
| '-' expr %prec '*' { $$ = &unaryExpr{$2} }
assignment:
IDENTIFIER '=' expr {$$ = &assignment{$1, $3}}
%%
`
if tr, err := Parse(str); err != nil {
t.Error(err)
} else {
// work in test
var node Node = tr
w := DoWalker(&node, &RootVistor{})
lalr := w.BuildLALR1()
fmt.Println(lalr)
root := w.VistorNode.(*RootVistor)
root.LALR1 = lalr
}
}
Loading

0 comments on commit c8d7268

Please sign in to comment.