From 421f25f52c197e27d81d0a281bd4553cacace72e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?=
 <tronic@users.noreply.github.com>
Date: Sun, 18 Aug 2019 14:10:06 +0300
Subject: [PATCH 1/3] Implement string block literals.

let str = ":
    formatted
        text
         is
      possible
echo str

echo(
  r":
    def foo():
        """Generated Python function without any escaping"""
        print("".join(["foo", "bar"]))
        print("Hello\nWorld!")
)

echo ":
  <pre>\
  lines split only in source code, \
  no newlines in output\
  </pre>\
---
 compiler/lexer.nim | 49 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index ff433928c9fab..bac32b95fc13e 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -814,6 +814,55 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) =
       else:
         tok.literal.add(L.buf[pos])
         inc(pos)
+  elif L.buf[pos] == ':' and L.buf[pos + 1] in {CR, LF}:
+    # string block literal
+    # TODO: Allow end-of-line comment to exist on starting line?
+    pos = handleCRLF(L, pos + 1)
+    if mode != normal: tok.tokType = tkRStrLit
+    else: tok.tokType = tkStrLit
+    let indent = L.currLineIndent + 2
+    var needIndent = indent
+    var emptyLines = 0
+    while true:
+      var c = L.buf[pos]
+      # skip indent and terminate if block ends
+      if needIndent > 0:
+        if c == ' ':
+          dec(needIndent)
+          inc(pos)
+        elif c in {CR, LF}:
+          inc(emptyLines)
+          pos = handleCRLF(L, pos)
+          needIndent = indent
+        else:
+          break
+        continue
+      # string block content
+      while emptyLines > 0:
+        add(tok.literal, "\n")
+        dec(emptyLines)
+      if c in {CR, LF, nimlexbase.EndOfFile}:
+        add(tok.literal, "\n")
+        pos = handleCRLF(L, pos)
+        needIndent = indent
+        continue
+      if (c == '\\') and mode == normal:
+        L.bufpos = pos
+        if L.buf[pos + 1] in {CR, LF, nimlexbase.EndOfFile}:
+          inc(L.bufpos)
+          pos = handleCRLF(L, pos + 1)
+          needIndent = indent
+          continue
+        else:
+          getEscapedChar(L, tok)
+          pos = L.bufpos
+      else:
+        add(tok.literal, c)
+        inc(pos)
+        L.bufpos = pos
+    if tok.literal == "":
+      lexMessage(L, errGenerated, "string block literal indented by two spaces expected")
+      tokenEndIgnore(tok, pos)
   else:
     # ordinary string literal
     if mode != normal: tok.tokType = tkRStrLit

From 1839c6fcddbbcb8c3751d8afae2a50a235c49744 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?=
 <tronic@users.noreply.github.com>
Date: Wed, 21 Aug 2019 13:54:19 +0300
Subject: [PATCH 2/3] Parenthesized block string literals may end with empty
 lines.

---
 compiler/lexer.nim | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index bac32b95fc13e..5b5336924b916 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -834,13 +834,15 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) =
           inc(emptyLines)
           pos = handleCRLF(L, pos)
           needIndent = indent
+        elif c == ')':
+          add(tok.literal, '\n'.repeat(emptyLines))
+          break
         else:
           break
         continue
       # string block content
-      while emptyLines > 0:
-        add(tok.literal, "\n")
-        dec(emptyLines)
+      add(tok.literal, '\n'.repeat(emptyLines))
+      emptyLines = 0
       if c in {CR, LF, nimlexbase.EndOfFile}:
         add(tok.literal, "\n")
         pos = handleCRLF(L, pos)

From e2a35dbf2099938af7b021afeeeedff2dd4a3dee Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?L=2E=20K=C3=A4rkk=C3=A4inen?=
 <tronic@users.noreply.github.com>
Date: Wed, 21 Aug 2019 18:02:58 +0300
Subject: [PATCH 3/3] Rewrite block string parser to fix bugs in line number
 and next token handling.

---
 compiler/lexer.nim | 82 ++++++++++++++++++++++++----------------------
 1 file changed, 42 insertions(+), 40 deletions(-)

diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index 5b5336924b916..f58c6fa15341e 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -816,55 +816,57 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) =
         inc(pos)
   elif L.buf[pos] == ':' and L.buf[pos + 1] in {CR, LF}:
     # string block literal
-    # TODO: Allow end-of-line comment to exist on starting line?
-    pos = handleCRLF(L, pos + 1)
-    if mode != normal: tok.tokType = tkRStrLit
-    else: tok.tokType = tkStrLit
+    L.bufpos = pos + 1
+    tok.tokType = if mode == normal: tkStrLit
+      else: tkRStrLit
     let indent = L.currLineIndent + 2
-    var needIndent = indent
-    var emptyLines = 0
     while true:
-      var c = L.buf[pos]
-      # skip indent and terminate if block ends
-      if needIndent > 0:
-        if c == ' ':
-          dec(needIndent)
+      # skip indent and/or empty lines without moving lexer
+      var needIndent = indent
+      var emptyLines = -1  # account for previous LF still in buffer
+      var pos = L.bufpos
+      while needIndent > 0:
+        var c = L.buf[pos]
+        if c in {' ', CR, LF}:
+          if c == ' ':
+            dec(needIndent)
+          if c == LF:
+            inc(emptyLines)
+            needIndent = indent
           inc(pos)
-        elif c in {CR, LF}:
-          inc(emptyLines)
-          pos = handleCRLF(L, pos)
-          needIndent = indent
-        elif c == ')':
-          add(tok.literal, '\n'.repeat(emptyLines))
-          break
         else:
+          # end of block found -> cancel lookahead
+          pos = L.bufpos
+          if c != ')': emptyLines = 0
           break
-        continue
-      # string block content
-      add(tok.literal, '\n'.repeat(emptyLines))
-      emptyLines = 0
-      if c in {CR, LF, nimlexbase.EndOfFile}:
-        add(tok.literal, "\n")
-        pos = handleCRLF(L, pos)
-        needIndent = indent
-        continue
-      if (c == '\\') and mode == normal:
-        L.bufpos = pos
-        if L.buf[pos + 1] in {CR, LF, nimlexbase.EndOfFile}:
+      if emptyLines > 0:
+        add(tok.literal, '\n'.repeat(emptyLines))
+      # fast-forward lexer to current position
+      while L.bufpos < pos:
+        if L.buf[L.bufpos] in {CR, LF}:
+          L.bufpos = handleCRLF(L, L.bufpos)
+        else:
           inc(L.bufpos)
-          pos = handleCRLF(L, pos + 1)
-          needIndent = indent
-          continue
+      # EXIT if end of block was reached
+      if needIndent > 0: break
+      # parse a line of string, break before EOL
+      while true:
+        var c = L.buf[L.bufpos]
+        if c in {CR, LF, nimlexbase.EndOfFile}:
+          add(tok.literal, "\n")
+          break
+        if (c == '\\') and mode == normal:
+          if L.buf[L.bufpos + 1] in {CR, LF, nimlexbase.EndOfFile}:
+            inc(L.bufpos)
+            break
+          else:
+            getEscapedChar(L, tok)
         else:
-          getEscapedChar(L, tok)
-          pos = L.bufpos
-      else:
-        add(tok.literal, c)
-        inc(pos)
-        L.bufpos = pos
+          add(tok.literal, c)
+          inc(L.bufpos)
     if tok.literal == "":
       lexMessage(L, errGenerated, "string block literal indented by two spaces expected")
-      tokenEndIgnore(tok, pos)
+      tokenEndIgnore(tok, L.bufpos)
   else:
     # ordinary string literal
     if mode != normal: tok.tokType = tkRStrLit