From 6f454e88c59c7bf7a0ca81840d1101e32a084b27 Mon Sep 17 00:00:00 2001 From: Simeon Schaub Date: Sat, 8 May 2021 22:10:55 +0200 Subject: [PATCH] allow escaping newlines with `\` inside strings This allows the use of `\` in front of newlines inside non-raw/non-custom string or command literals as a line continuation character, so the following newline is ignored. This way, long strings without any newlines in them don't have to be written in a single line or be broken up. I think we might also want to use this to improve the printing of long strings in the REPL by printing them as multiline strings, making use of `\` for long lines if necessary, but that can be discussed separately. The command literal part is technically breaking, but the current behavior is probably unintuitive enough that this can be considered a minor change. For string literals, this should be entirely non-breaking since a single `\` before a newline currently throws a parsing error. closes #37728 --- base/shell.jl | 5 ++- src/julia-parser.scm | 28 ++++++++++++----- test/syntax.jl | 72 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 96 insertions(+), 9 deletions(-) diff --git a/base/shell.jl b/base/shell.jl index 48fcb4079bda0c..6ee42c40f16912 100644 --- a/base/shell.jl +++ b/base/shell.jl @@ -88,7 +88,10 @@ function shell_parse(str::AbstractString, interpolate::Bool=true; in_double_quotes = !in_double_quotes i = consume_upto!(arg, s, i, j) elseif c == '\\' - if in_double_quotes + if !isempty(st) && peek(st)[2] == '\n' + i = consume_upto!(arg, s, i, j) + 1 + _ = popfirst!(st) + elseif in_double_quotes isempty(st) && error("unterminated double quote") k, c′ = peek(st) if c′ == '"' || c′ == '$' || c′ == '\\' diff --git a/src/julia-parser.scm b/src/julia-parser.scm index c6510dcbd9536c..c402c380863216 100644 --- a/src/julia-parser.scm +++ b/src/julia-parser.scm @@ -311,6 +311,9 @@ (define (numchk n s) (or n (error (string "invalid numeric constant \"" s "\"")))) +(define (string-lastchar s) + (string.char s (string.dec s (length s)))) + (define (read-number port leadingdot neg) (let ((str (open-output-string)) (pred char-numeric?) @@ -408,7 +411,7 @@ (string.sub s 1) s) r is-float32-literal))) - (if (and (eqv? #\. (string.char s (string.dec s (length s)))) + (if (and (eqv? #\. (string-lastchar s)) (let ((nxt (peek-char port))) (and (not (eof-object? nxt)) (or (identifier-start-char? nxt) @@ -2114,13 +2117,22 @@ (define (parse-string-literal s delim raw) (let ((p (ts:port s))) ((if raw identity unescape-parsed-string-literal) - (if (eqv? (peek-char p) delim) - (if (eqv? (peek-char (take-char p)) delim) - (map-first strip-leading-newline - (dedent-triplequoted-string - (parse-string-literal- 2 (take-char p) s delim raw))) - (list "")) - (parse-string-literal- 0 p s delim raw))))) + (map (lambda (s) (if (and (not raw) (string? s)) + (let ((spl (string-split s "\\\n"))) + (foldl (lambda (line s) + (if (and (> (length s) 0) (eqv? (string-lastchar s) #\\)) + (string s "\\\n" line) + (string s line))) + "" + spl)) + s)) + (if (eqv? (peek-char p) delim) + (if (eqv? (peek-char (take-char p)) delim) + (map-first strip-leading-newline + (dedent-triplequoted-string + (parse-string-literal- 2 (take-char p) s delim raw))) + (list "")) + (parse-string-literal- 0 p s delim raw)))))) (define (strip-leading-newline s) (let ((n (sizeof s))) diff --git a/test/syntax.jl b/test/syntax.jl index d934e9358baaca..0a6af1b15f21b3 100644 --- a/test/syntax.jl +++ b/test/syntax.jl @@ -2787,3 +2787,75 @@ macro m_nospecialize_unnamed_hygiene() end @test @m_nospecialize_unnamed_hygiene()(1) === Any + +@testset "escaping newlines inside strings" begin + c = "c" + + @test "a\ +b" == "ab" + @test "a\ + b" == "a b" + @test raw"a\ +b" == "a\\\nb" + @test "a$c\ +b" == "acb" + @test "\\ +" == "\\\n" + + + @test """ + a\ + b""" == "ab" + @test """ + a\ + b""" == "a b" + @test """ + a\ + b""" == " ab" + @test raw""" + a\ + b""" == "a\\\nb" + @test """ + a$c\ + b""" == "acb" + @test """ + \\ + """ == "\\\n" + + + @test `a\ +b` == `ab` + @test `a\ + b` == `a b` + @test `a$c\ +b` == `acb` + @test `"a\ +b"` == `ab` + @test `'a\ +b'` == `ab` + @test `\\ +` == `'\'` + + + @test ``` + a\ + b``` == `ab` + @test ``` + a\ + b``` == `a b` + @test ``` + a\ + b``` == ` ab` + @test ``` + a$c\ + b``` == `acb` + @test ``` + "a\ + b"``` == `ab` + @test ``` + 'a\ + b'``` == `ab` + @test ``` + \\ + ``` == `'\'` +end