Skip to content

Commit

Permalink
Rewrite block string parser to fix bugs in line number and next token…
Browse files Browse the repository at this point in the history
… handling.
  • Loading branch information
Tronic committed Sep 7, 2020
1 parent 1839c6f commit e2a35db
Showing 1 changed file with 42 additions and 40 deletions.
82 changes: 42 additions & 40 deletions compiler/lexer.nim
Original file line number Diff line number Diff line change
Expand Up @@ -816,55 +816,57 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) =
inc(pos)
elif L.buf[pos] == ':' and L.buf[pos + 1] in {CR, LF}:
# string block literal
# TODO: Allow end-of-line comment to exist on starting line?
pos = handleCRLF(L, pos + 1)
if mode != normal: tok.tokType = tkRStrLit
else: tok.tokType = tkStrLit
L.bufpos = pos + 1
tok.tokType = if mode == normal: tkStrLit
else: tkRStrLit
let indent = L.currLineIndent + 2
var needIndent = indent
var emptyLines = 0
while true:
var c = L.buf[pos]
# skip indent and terminate if block ends
if needIndent > 0:
if c == ' ':
dec(needIndent)
# skip indent and/or empty lines without moving lexer
var needIndent = indent
var emptyLines = -1 # account for previous LF still in buffer
var pos = L.bufpos
while needIndent > 0:
var c = L.buf[pos]
if c in {' ', CR, LF}:
if c == ' ':
dec(needIndent)
if c == LF:
inc(emptyLines)
needIndent = indent
inc(pos)
elif c in {CR, LF}:
inc(emptyLines)
pos = handleCRLF(L, pos)
needIndent = indent
elif c == ')':
add(tok.literal, '\n'.repeat(emptyLines))
break
else:
# end of block found -> cancel lookahead
pos = L.bufpos
if c != ')': emptyLines = 0
break
continue
# string block content
add(tok.literal, '\n'.repeat(emptyLines))
emptyLines = 0
if c in {CR, LF, nimlexbase.EndOfFile}:
add(tok.literal, "\n")
pos = handleCRLF(L, pos)
needIndent = indent
continue
if (c == '\\') and mode == normal:
L.bufpos = pos
if L.buf[pos + 1] in {CR, LF, nimlexbase.EndOfFile}:
if emptyLines > 0:
add(tok.literal, '\n'.repeat(emptyLines))
# fast-forward lexer to current position
while L.bufpos < pos:
if L.buf[L.bufpos] in {CR, LF}:
L.bufpos = handleCRLF(L, L.bufpos)
else:
inc(L.bufpos)
pos = handleCRLF(L, pos + 1)
needIndent = indent
continue
# EXIT if end of block was reached
if needIndent > 0: break
# parse a line of string, break before EOL
while true:
var c = L.buf[L.bufpos]
if c in {CR, LF, nimlexbase.EndOfFile}:
add(tok.literal, "\n")
break
if (c == '\\') and mode == normal:
if L.buf[L.bufpos + 1] in {CR, LF, nimlexbase.EndOfFile}:
inc(L.bufpos)
break
else:
getEscapedChar(L, tok)
else:
getEscapedChar(L, tok)
pos = L.bufpos
else:
add(tok.literal, c)
inc(pos)
L.bufpos = pos
add(tok.literal, c)
inc(L.bufpos)
if tok.literal == "":
lexMessage(L, errGenerated, "string block literal indented by two spaces expected")
tokenEndIgnore(tok, pos)
tokenEndIgnore(tok, L.bufpos)
else:
# ordinary string literal
if mode != normal: tok.tokType = tkRStrLit
Expand Down

0 comments on commit e2a35db

Please sign in to comment.