Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

allow escaping newlines with \ inside strings #40753

Merged
merged 7 commits into from
Jun 3, 2021
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ New language features
in `[A; B]` has always described concatenating along the first dimension (vertically), now two
semicolons `[A;; B]` do so in the second dimension (horizontally), three semicolons `;;;` in the
third, and so on. ([#33697])
* A backslash (`\`) before a newline inside a string literal now escapes the newline while also
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's unclear what "escapes" means here; sometimes escaping a character means including it, e.g. "\"".

There are also manual sections on string literals that should mention this.

respecting indentation. This can be used to split up long strings without newlines into multiple
lines of code. ([#40753])

Language changes
----------------
Expand Down Expand Up @@ -102,6 +105,8 @@ Standard library changes
* `@lock` is now exported from Base ([#39588]).
* The experimental function `Base.catch_stack()` has been renamed to `current_exceptions()`, exported from Base and given a more specific return type ([#29901])
* Some degree trigonometric functions, `sind`, `cosd`, `tand`, `asind`, `acosd`, `asecd`, `acscd`, `acotd`, `atand` now accept an square matrix ([#39758]).
* A backslash before a newline in command literals now always escapes the newline, similar to standard string
literals, whereas the result was not well-defined before. ([#40753])

#### Package Manager

Expand Down
9 changes: 6 additions & 3 deletions base/shell.jl
Original file line number Diff line number Diff line change
Expand Up @@ -87,15 +87,18 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
elseif !in_single_quotes && c == '"'
in_double_quotes = !in_double_quotes
i = consume_upto!(arg, s, i, j)
elseif c == '\\'
if in_double_quotes
elseif !in_single_quotes && c == '\\'
if !isempty(st) && peek(st)[2] == '\n'
i = consume_upto!(arg, s, i, j) + 1
_ = popfirst!(st)
elseif in_double_quotes
isempty(st) && error("unterminated double quote")
k, c′ = peek(st)
if c′ == '"' || c′ == '$' || c′ == '\\'
i = consume_upto!(arg, s, i, j)
_ = popfirst!(st)
end
elseif !in_single_quotes
else
isempty(st) && error("dangling backslash")
i = consume_upto!(arg, s, i, j)
_ = popfirst!(st)
Expand Down
42 changes: 32 additions & 10 deletions src/julia-parser.scm
Original file line number Diff line number Diff line change
Expand Up @@ -311,6 +311,9 @@
(define (numchk n s)
(or n (error (string "invalid numeric constant \"" s "\""))))

(define (string-lastchar s)
(string.char s (string.dec s (length s))))

(define (read-number port leadingdot neg)
(let ((str (open-output-string))
(pred char-numeric?)
Expand Down Expand Up @@ -412,7 +415,7 @@
(string.sub s 1)
s)
r is-float32-literal)))
(if (and (eqv? #\. (string.char s (string.dec s (length s))))
(if (and (eqv? #\. (string-lastchar s))
(let ((nxt (peek-char port)))
(and (not (eof-object? nxt))
(or (identifier-start-char? nxt)
Expand Down Expand Up @@ -2182,16 +2185,35 @@
(define (unescape-parsed-string-literal strs)
(map-at even? unescape-string strs))

;; remove `\` followed by a newline
(define (strip-escaped-newline s)
(let ((in (open-input-string s))
(out (open-output-string)))
(define (loop preceding-backslash?)
(let ((c (read-char in)))
(cond ((eof-object? c))
(preceding-backslash?
(if (not (eqv? c #\newline))
(begin (write-char #\\ out) (write-char c out)))
(loop #f))
((eqv? c #\\) (loop #t))
(else (write-char c out) (loop #f)))))
(loop #f)
(io.tostring! out)))

(define (parse-string-literal s delim raw)
(let ((p (ts:port s)))
((if raw identity unescape-parsed-string-literal)
(if (eqv? (peek-char p) delim)
(if (eqv? (peek-char (take-char p)) delim)
(map-first strip-leading-newline
(dedent-triplequoted-string
(parse-string-literal- 2 (take-char p) s delim raw)))
(list ""))
(parse-string-literal- 0 p s delim raw)))))
(let* ((p (ts:port s))
(str (if (eqv? (peek-char p) delim)
(if (eqv? (peek-char (take-char p)) delim)
(map-first strip-leading-newline
(dedent-triplequoted-string
(parse-string-literal- 2 (take-char p) s delim raw)))
(list ""))
(parse-string-literal- 0 p s delim raw))))
(if raw str (unescape-parsed-string-literal
(map (lambda (s)
(if (string? s) (strip-escaped-newline s) s))
str)))))

(define (strip-leading-newline s)
(let ((n (sizeof s)))
Expand Down
97 changes: 97 additions & 0 deletions test/syntax.jl
Original file line number Diff line number Diff line change
Expand Up @@ -2830,3 +2830,100 @@ end
x[3], x[1:2]... = x
@test x == [2, 3, 1]
end

@testset "escaping newlines inside strings" begin
c = "c"

@test "a\
b" == "ab"
@test "a\
b" == "a b"
@test raw"a\
b" == "a\\\nb"
@test "a$c\
b" == "acb"
@test "\\
" == "\\\n"


@test """
a\
b""" == "ab"
@test """
a\
b""" == "a b"
@test """
a\
b""" == " ab"
@test raw"""
a\
b""" == "a\\\nb"
@test """
a$c\
b""" == "acb"

@test """
\
""" == ""
@test """
\\
""" == "\\\n"
@test """
\\\
""" == "\\"
@test """
\\\\
""" == "\\\\\n"
@test """
\\\\\
""" == "\\\\"
@test """
\
\
""" == ""
@test """
\\
\
""" == "\\\n"
@test """
\\\
\
""" == "\\"


@test `a\
b` == `ab`
@test `a\
b` == `a b`
@test `a$c\
b` == `acb`
@test `"a\
b"` == `ab`
@test `'a\
b'` == `$("a\\\nb")`
@test `\\
` == `'\'`


@test ```
a\
b``` == `ab`
@test ```
a\
b``` == `a b`
@test ```
a\
b``` == ` ab`
@test ```
a$c\
b``` == `acb`
@test ```
"a\
b"``` == `ab`
@test ```
'a\
b'``` == `$("a\\\nb")`
@test ```
\\
``` == `'\'`
end