Skip to content

Commit

Permalink
implement symbol scanner (buggy still)
Browse files Browse the repository at this point in the history
  • Loading branch information
bobertlo committed Nov 23, 2024
1 parent 949368f commit 0da8eb9
Show file tree
Hide file tree
Showing 5 changed files with 228 additions and 32 deletions.
14 changes: 13 additions & 1 deletion compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,19 @@ func (c *compiler) compile() (WarriorData, error) {

func CompileWarrior(r io.Reader, config SimulatorConfig) (WarriorData, error) {
lexer := newLexer(r)
parser := newParser(lexer)
tokens, err := lexer.Tokens()
if err != nil {
return WarriorData{}, err
}

scanner := newSymbolScanner(newBufTokenReader(tokens))
symbols, err := scanner.ScanInput()
if err != nil {
return WarriorData{}, err
}
fmt.Println(symbols)

parser := newParser(newBufTokenReader(tokens))
sourceLines, metadata, err := parser.parse()
if err != nil {
return WarriorData{}, err
Expand Down
31 changes: 0 additions & 31 deletions lex.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,37 +22,6 @@ type lexer struct {
tokens chan token
}

// butTokenReader implements the same interface as a streaming parser to let
// us cache and reuse the token stream instead of making multiple passes with
// the lexer
type bufTokenReader struct {
tokens []token
i int
}

func newBufTokenReader(tokens []token) *bufTokenReader {
return &bufTokenReader{tokens: tokens}
}

func (r *bufTokenReader) NextToken() (token, error) {
if r.i >= len(r.tokens) {
return token{}, fmt.Errorf("no more tokens")
}
next := r.tokens[r.i]
r.i++
return next, nil
}

func (r *bufTokenReader) Tokens() ([]token, error) {
if r.i >= len(r.tokens) {
return nil, fmt.Errorf("no more tokens")
}
subslice := r.tokens[r.i:]
ret := make([]token, len(subslice))
copy(subslice, ret)
return ret, nil
}

type lexStateFn func(l *lexer) lexStateFn

func newLexer(r io.Reader) *lexer {
Expand Down
141 changes: 141 additions & 0 deletions symbol_scanner.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package gmars

import (
"fmt"
"strings"
)

type symbolScanner struct {
lex tokenReader

nextToken token
atEOF bool
valBuf []token
labelBuf []string
err error

symbols map[string][]token
}

type scanStateFn func(p *symbolScanner) scanStateFn

func newSymbolScanner(lex tokenReader) *symbolScanner {
pre := &symbolScanner{
lex: lex,
symbols: make(map[string][]token),
}

pre.next()

return pre
}

func (p *symbolScanner) next() token {
if p.atEOF {
return token{typ: tokEOF}
}
tok, err := p.lex.NextToken()
if err != nil {
p.atEOF = true
return token{tokError, fmt.Sprintf("%s\n", err)}
}
if tok.typ == tokEOF || tok.typ == tokError {
p.atEOF = true
}
retTok := p.nextToken
p.nextToken = tok
return retTok
}

// run the preprocessor
func (p *symbolScanner) ScanInput() (map[string][]token, error) {
for state := preLine; state != nil; {
state = state(p)
}
if p.err != nil {
return nil, p.err
}
return p.symbols, nil
}

func (p *symbolScanner) consume(nextState scanStateFn) scanStateFn {
p.next()
if p.nextToken.typ == tokEOF {
return nil
}
return nextState
}

// run at start of each line
// on text: preLabels
// on other: preConsumeLine
func preLine(p *symbolScanner) scanStateFn {
switch p.nextToken.typ {
case tokText:
p.labelBuf = make([]string, 0)
return preLabels
default:
return preConsumeLine
}
}

// text equ: consumeValue
// text op: consumLine
// text default: preLabels
// anything else: consumeLine
func preLabels(p *symbolScanner) scanStateFn {
switch p.nextToken.typ {
case tokText:
if p.nextToken.IsPseudoOp() {
if strings.ToLower(p.nextToken.val) == "equ" {
p.valBuf = make([]token, 0)
return p.consume(preScanValue)
} else {
return preConsumeLine
}
} else if p.nextToken.IsOp() {
return preConsumeLine
}
p.labelBuf = append(p.labelBuf, p.nextToken.val)
return p.consume(preLabels)
case tokComment:
fallthrough
case tokNewline:
return p.consume(preLabels)
case tokEOF:
return nil
default:
return preConsumeLine
}
}

func preConsumeLine(p *symbolScanner) scanStateFn {
switch p.nextToken.typ {
case tokNewline:
return p.consume(preLine)
case tokError:
return nil
case tokEOF:
return nil
default:
return p.consume(preConsumeLine)
}
}

func preScanValue(p *symbolScanner) scanStateFn {
for p.nextToken.typ != tokNewline && p.nextToken.typ != tokEOF {
p.valBuf = append(p.valBuf, p.nextToken)
p.next()
}
for _, label := range p.labelBuf {
_, ok := p.symbols[label]
if ok {
p.err = fmt.Errorf("symbol '%s' redefined", label)
return nil
}
p.symbols[label] = p.valBuf
}
p.valBuf = make([]token, 0)
p.labelBuf = make([]string, 0)
return p.consume(preLine)
}
40 changes: 40 additions & 0 deletions symbol_scanner_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
package gmars

import (
"strings"
"testing"

"github.com/stretchr/testify/require"
)

type symbolScannerTestCase struct {
input string
output map[string][]token
}

func runSymbolScannerTests(t *testing.T, cases []symbolScannerTestCase) {
for _, test := range cases {
tokens, err := LexInput(strings.NewReader(test.input))
require.NoError(t, err)
require.NotNil(t, tokens)

scanner := newSymbolScanner(newBufTokenReader(tokens))
symbols, err := scanner.ScanInput()
require.NoError(t, err)
require.NotNil(t, symbols)

require.Equal(t, test.output, symbols)
}
}

func TestSymbolScanner(t *testing.T) {
tests := []symbolScannerTestCase{
{
input: "test equ 2\ndat 0, test\n",
output: map[string][]token{
"test": {{tokNumber, "2"}},
},
},
}
runSymbolScannerTests(t, tests)
}
34 changes: 34 additions & 0 deletions tokenbuf.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package gmars

import "fmt"

// butTokenReader implements the same interface as a streaming parser to let
// us cache and reuse the token stream instead of making multiple passes with
// the lexer
type bufTokenReader struct {
tokens []token
i int
}

func newBufTokenReader(tokens []token) *bufTokenReader {
return &bufTokenReader{tokens: tokens}
}

func (r *bufTokenReader) NextToken() (token, error) {
if r.i >= len(r.tokens) {
return token{}, fmt.Errorf("no more tokens")
}
next := r.tokens[r.i]
r.i++
return next, nil
}

func (r *bufTokenReader) Tokens() ([]token, error) {
if r.i >= len(r.tokens) {
return nil, fmt.Errorf("no more tokens")
}
subslice := r.tokens[r.i:]
ret := make([]token, len(subslice))
copy(subslice, ret)
return ret, nil
}

0 comments on commit 0da8eb9

Please sign in to comment.