Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AVM: Add Semicolon Parsing #4363

Merged
merged 18 commits into from
Aug 11, 2022
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
159 changes: 92 additions & 67 deletions data/transactions/logic/assembler.go
Original file line number Diff line number Diff line change
Expand Up @@ -545,7 +545,7 @@ func asmPushBytes(ops *OpStream, spec *OpSpec, args []string) error {
return nil
}

func base32DecdodeAnyPadding(x string) (val []byte, err error) {
func base32DecodeAnyPadding(x string) (val []byte, err error) {
val, err = base32.StdEncoding.WithPadding(base32.NoPadding).DecodeString(x)
if err != nil {
// try again with standard padding
Expand All @@ -567,7 +567,7 @@ func parseBinaryArgs(args []string) (val []byte, consumed int, err error) {
err = errors.New("byte base32 arg lacks close paren")
return
}
val, err = base32DecdodeAnyPadding(arg[open+1 : close])
val, err = base32DecodeAnyPadding(arg[open+1 : close])
if err != nil {
return
}
Expand Down Expand Up @@ -595,7 +595,7 @@ func parseBinaryArgs(args []string) (val []byte, consumed int, err error) {
err = fmt.Errorf("need literal after 'byte %s'", arg)
return
}
val, err = base32DecdodeAnyPadding(args[1])
val, err = base32DecodeAnyPadding(args[1])
if err != nil {
return
}
Expand Down Expand Up @@ -1399,25 +1399,26 @@ func typecheck(expected, got StackType) bool {
return expected == got
}

var spaces = [256]uint8{'\t': 1, ' ': 1}
// semi-colon is quite space-like, so include it
var spaces = [256]bool{'\t': true, ' ': true, ';': true}

func fieldsFromLine(line string) []string {
var fields []string

i := 0
for i < len(line) && spaces[line[i]] != 0 {
for i < len(line) && spaces[line[i]] {
i++
}

start := i
inString := false
inBase64 := false
inString := false // tracked to allow spaces and comments inside
inBase64 := false // tracked to allow '//' inside
for i < len(line) {
if spaces[line[i]] == 0 { // if not space
if !spaces[line[i]] { // if not space
switch line[i] {
case '"': // is a string literal?
if !inString {
if i == 0 || i > 0 && spaces[line[i-1]] != 0 {
if i == 0 || i > 0 && spaces[line[i-1]] {
inString = true
}
} else {
Expand Down Expand Up @@ -1446,19 +1447,29 @@ func fieldsFromLine(line string) []string {
i++
continue
}

// we've hit a space, end last token unless inString

if !inString {
field := line[start:i]
fields = append(fields, field)
if field == "base64" || field == "b64" {
inBase64 = true
} else if inBase64 {
if line[i] == ';' {
fields = append(fields, ";")
}
if inBase64 {
inBase64 = false
} else if field == "base64" || field == "b64" {
inBase64 = true
}
}
i++

// gooble up consecutive whitespace (but notice semis)
if !inString {
for i < len(line) && spaces[line[i]] != 0 {
for i < len(line) && spaces[line[i]] {
if line[i] == ';' {
fields = append(fields, ";")
}
i++
}
start = i
Expand Down Expand Up @@ -1531,6 +1542,17 @@ func (ops *OpStream) trackStack(args StackTypes, returns StackTypes, instruction
}
}

// processFields breaks fields into a slice of tokens up to the first
// semi-colon, and the rest.
func processFields(fields []string) (current, rest []string) {
for i, field := range fields {
if field == ";" {
return fields[:i], fields[i+1:]
}
}
return fields, nil
}
Copy link
Contributor

@tzaffi tzaffi Aug 11, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the fact that splitTokens() behaves differently for the following 2 examples problematic? (the 2nd output isn't consistently nil):

	tokens := []string{"hello", "there"}
	splitTokens(tokens)

	tokens = []string{"hello", "there", ";"}
	splitTokens(tokens)

See the playground

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the difference will turn out to be important in the next step of the plan. Once tokens can expand into other tokens, it will matter whether something expands to included the semi or not. So I think the distinction is useful. (But in the meantime, the distinction won't matter because the next tokens are tested with len.


// assemble reads text from an input and accumulates the program
func (ops *OpStream) assemble(text string) error {
fin := strings.NewReader(text)
Expand All @@ -1541,73 +1563,77 @@ func (ops *OpStream) assemble(text string) error {
for scanner.Scan() {
ops.sourceLine++
line := scanner.Text()
line = strings.TrimSpace(line)
if len(line) == 0 {
ops.trace("%3d: 0 line\n", ops.sourceLine)
continue
}
if strings.HasPrefix(line, "//") {
ops.trace("%3d: // line\n", ops.sourceLine)
continue
}
if strings.HasPrefix(line, "#pragma") {
ops.trace("%3d: #pragma line\n", ops.sourceLine)
ops.pragma(line)
ops.trace("%3d: comment\n", ops.sourceLine)
continue
}
fields := fieldsFromLine(line)
if len(fields) == 0 {
ops.trace("%3d: no fields\n", ops.sourceLine)
continue
}
// we're about to begin processing opcodes, so settle the Version
if ops.Version == assemblerNoVersion {
ops.Version = AssemblerDefaultVersion
}
if ops.versionedPseudoOps == nil {
ops.versionedPseudoOps = prepareVersionedPseudoTable(ops.Version)
}
opstring := fields[0]
if opstring[len(opstring)-1] == ':' {
ops.createLabel(opstring[:len(opstring)-1])
fields = fields[1:]
if len(fields) == 0 {
ops.trace("%3d: label only\n", ops.sourceLine)
for current, next := processFields(fields); len(current) > 0 || len(next) > 0; current, next = processFields(next) {
if len(current) == 0 {
continue
}
opstring = fields[0]
}
spec, expandedName, ok := getSpec(ops, opstring, fields[1:])
if ok {
ops.trace("%3d: %s\t", ops.sourceLine, opstring)
ops.recordSourceLine()
if spec.Modes == modeApp {
ops.HasStatefulOps = true
opstring := current[0]
if opstring == "#pragma" {
ops.trace("%3d: #pragma line\n", ops.sourceLine)
// pragma get the rest of the tokens
ops.pragma(append(current, next...))
break
}
args, returns := spec.Arg.Types, spec.Return.Types
if spec.refine != nil {
nargs, nreturns := spec.refine(&ops.known, fields[1:])
if nargs != nil {
args = nargs
}
if nreturns != nil {
returns = nreturns
}
// we're about to begin processing opcodes, so settle the Version
if ops.Version == assemblerNoVersion {
ops.Version = AssemblerDefaultVersion
}
ops.trackStack(args, returns, append([]string{expandedName}, fields[1:]...))
spec.asm(ops, &spec, fields[1:])
if spec.deadens() { // An unconditional branch deadens the following code
ops.known.deaden()
if ops.versionedPseudoOps == nil {
ops.versionedPseudoOps = prepareVersionedPseudoTable(ops.Version)
}
if spec.Name == "callsub" {
// since retsub comes back to the callsub, it is an entry point like a label
ops.known.label()
if opstring[len(opstring)-1] == ':' {
ops.createLabel(opstring[:len(opstring)-1])
current = current[1:]
if len(current) == 0 {
ops.trace("%3d: label only\n", ops.sourceLine)
continue
}
opstring = current[0]
}
spec, expandedName, ok := getSpec(ops, opstring, current[1:])
if ok {
ops.trace("%3d: %s\t", ops.sourceLine, opstring)
ops.recordSourceLine()
if spec.Modes == modeApp {
ops.HasStatefulOps = true
}
args, returns := spec.Arg.Types, spec.Return.Types
if spec.refine != nil {
nargs, nreturns := spec.refine(&ops.known, current[1:])
if nargs != nil {
args = nargs
}
if nreturns != nil {
returns = nreturns
}
}
ops.trackStack(args, returns, append([]string{expandedName}, current[1:]...))
spec.asm(ops, &spec, current[1:])
if spec.deadens() { // An unconditional branch deadens the following code
ops.known.deaden()
}
if spec.Name == "callsub" {
// since retsub comes back to the callsub, it is an entry point like a label
ops.known.label()
}
}
ops.trace("\n")
continue
}
}

if err := scanner.Err(); err != nil {
if errors.Is(err, bufio.ErrTooLong) {
err = errors.New("line too long")
}
ops.error(err)
}

// backward compatibility: do not allow jumps behind last instruction in v1
if ops.Version <= 1 {
for label, dest := range ops.labels {
Expand Down Expand Up @@ -1635,8 +1661,7 @@ func (ops *OpStream) assemble(text string) error {
return nil
}

func (ops *OpStream) pragma(line string) error {
fields := strings.Split(line, " ")
func (ops *OpStream) pragma(fields []string) error {
if fields[0] != "#pragma" {
return ops.errorf("invalid syntax: %s", fields[0])
}
Expand Down
Loading