From 421f25f52c197e27d81d0a281bd4553cacace72e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= Date: Sun, 18 Aug 2019 14:10:06 +0300 Subject: [PATCH 1/3] Implement string block literals. let str = ": formatted text is possible echo str echo( r": def foo(): """Generated Python function without any escaping""" print("".join(["foo", "bar"])) print("Hello\nWorld!") ) echo ":
\
  lines split only in source code, \
  no newlines in output\
  
\ --- compiler/lexer.nim | 49 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/compiler/lexer.nim b/compiler/lexer.nim index ff433928c9fab..bac32b95fc13e 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -814,6 +814,55 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) = else: tok.literal.add(L.buf[pos]) inc(pos) + elif L.buf[pos] == ':' and L.buf[pos + 1] in {CR, LF}: + # string block literal + # TODO: Allow end-of-line comment to exist on starting line? + pos = handleCRLF(L, pos + 1) + if mode != normal: tok.tokType = tkRStrLit + else: tok.tokType = tkStrLit + let indent = L.currLineIndent + 2 + var needIndent = indent + var emptyLines = 0 + while true: + var c = L.buf[pos] + # skip indent and terminate if block ends + if needIndent > 0: + if c == ' ': + dec(needIndent) + inc(pos) + elif c in {CR, LF}: + inc(emptyLines) + pos = handleCRLF(L, pos) + needIndent = indent + else: + break + continue + # string block content + while emptyLines > 0: + add(tok.literal, "\n") + dec(emptyLines) + if c in {CR, LF, nimlexbase.EndOfFile}: + add(tok.literal, "\n") + pos = handleCRLF(L, pos) + needIndent = indent + continue + if (c == '\\') and mode == normal: + L.bufpos = pos + if L.buf[pos + 1] in {CR, LF, nimlexbase.EndOfFile}: + inc(L.bufpos) + pos = handleCRLF(L, pos + 1) + needIndent = indent + continue + else: + getEscapedChar(L, tok) + pos = L.bufpos + else: + add(tok.literal, c) + inc(pos) + L.bufpos = pos + if tok.literal == "": + lexMessage(L, errGenerated, "string block literal indented by two spaces expected") + tokenEndIgnore(tok, pos) else: # ordinary string literal if mode != normal: tok.tokType = tkRStrLit From 1839c6fcddbbcb8c3751d8afae2a50a235c49744 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= Date: Wed, 21 Aug 2019 13:54:19 +0300 Subject: [PATCH 2/3] Parenthesized block string literals may end with empty lines. --- compiler/lexer.nim | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/compiler/lexer.nim b/compiler/lexer.nim index bac32b95fc13e..5b5336924b916 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -834,13 +834,15 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) = inc(emptyLines) pos = handleCRLF(L, pos) needIndent = indent + elif c == ')': + add(tok.literal, '\n'.repeat(emptyLines)) + break else: break continue # string block content - while emptyLines > 0: - add(tok.literal, "\n") - dec(emptyLines) + add(tok.literal, '\n'.repeat(emptyLines)) + emptyLines = 0 if c in {CR, LF, nimlexbase.EndOfFile}: add(tok.literal, "\n") pos = handleCRLF(L, pos) From e2a35dbf2099938af7b021afeeeedff2dd4a3dee Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?= Date: Wed, 21 Aug 2019 18:02:58 +0300 Subject: [PATCH 3/3] Rewrite block string parser to fix bugs in line number and next token handling. --- compiler/lexer.nim | 82 ++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 40 deletions(-) diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 5b5336924b916..f58c6fa15341e 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -816,55 +816,57 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) = inc(pos) elif L.buf[pos] == ':' and L.buf[pos + 1] in {CR, LF}: # string block literal - # TODO: Allow end-of-line comment to exist on starting line? - pos = handleCRLF(L, pos + 1) - if mode != normal: tok.tokType = tkRStrLit - else: tok.tokType = tkStrLit + L.bufpos = pos + 1 + tok.tokType = if mode == normal: tkStrLit + else: tkRStrLit let indent = L.currLineIndent + 2 - var needIndent = indent - var emptyLines = 0 while true: - var c = L.buf[pos] - # skip indent and terminate if block ends - if needIndent > 0: - if c == ' ': - dec(needIndent) + # skip indent and/or empty lines without moving lexer + var needIndent = indent + var emptyLines = -1 # account for previous LF still in buffer + var pos = L.bufpos + while needIndent > 0: + var c = L.buf[pos] + if c in {' ', CR, LF}: + if c == ' ': + dec(needIndent) + if c == LF: + inc(emptyLines) + needIndent = indent inc(pos) - elif c in {CR, LF}: - inc(emptyLines) - pos = handleCRLF(L, pos) - needIndent = indent - elif c == ')': - add(tok.literal, '\n'.repeat(emptyLines)) - break else: + # end of block found -> cancel lookahead + pos = L.bufpos + if c != ')': emptyLines = 0 break - continue - # string block content - add(tok.literal, '\n'.repeat(emptyLines)) - emptyLines = 0 - if c in {CR, LF, nimlexbase.EndOfFile}: - add(tok.literal, "\n") - pos = handleCRLF(L, pos) - needIndent = indent - continue - if (c == '\\') and mode == normal: - L.bufpos = pos - if L.buf[pos + 1] in {CR, LF, nimlexbase.EndOfFile}: + if emptyLines > 0: + add(tok.literal, '\n'.repeat(emptyLines)) + # fast-forward lexer to current position + while L.bufpos < pos: + if L.buf[L.bufpos] in {CR, LF}: + L.bufpos = handleCRLF(L, L.bufpos) + else: inc(L.bufpos) - pos = handleCRLF(L, pos + 1) - needIndent = indent - continue + # EXIT if end of block was reached + if needIndent > 0: break + # parse a line of string, break before EOL + while true: + var c = L.buf[L.bufpos] + if c in {CR, LF, nimlexbase.EndOfFile}: + add(tok.literal, "\n") + break + if (c == '\\') and mode == normal: + if L.buf[L.bufpos + 1] in {CR, LF, nimlexbase.EndOfFile}: + inc(L.bufpos) + break + else: + getEscapedChar(L, tok) else: - getEscapedChar(L, tok) - pos = L.bufpos - else: - add(tok.literal, c) - inc(pos) - L.bufpos = pos + add(tok.literal, c) + inc(L.bufpos) if tok.literal == "": lexMessage(L, errGenerated, "string block literal indented by two spaces expected") - tokenEndIgnore(tok, pos) + tokenEndIgnore(tok, L.bufpos) else: # ordinary string literal if mode != normal: tok.tokType = tkRStrLit