Skip to content

Commit d5f1dca

Browse files
authored
allow escaping newlines with \ inside strings (#40753)
This allows the use of `\` in front of newlines inside non-raw/non-custom string or command literals as a line continuation character, so the following newline is ignored. This way, long strings without any newlines in them don't have to be written in a single line or be broken up. I think we might also want to use this to improve the printing of long strings in the REPL by printing them as multiline strings, making use of `\` for long lines if necessary, but that can be discussed separately. The command literal part is technically breaking, but the current behavior is probably unintuitive enough that this can be considered a minor change. For string literals, this should be entirely non-breaking since a single `\` before a newline currently throws a parsing error. closes #37728
1 parent 9f32653 commit d5f1dca

File tree

5 files changed

+157
-13
lines changed

5 files changed

+157
-13
lines changed

NEWS.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ New language features
1616
in `[A; B]` has always described concatenating along the first dimension (vertically), now two
1717
semicolons `[A;; B]` do so in the second dimension (horizontally), three semicolons `;;;` in the
1818
third, and so on. ([#33697])
19+
* A backslash (`\`) before a newline inside a string literal now removes the newline while also
20+
respecting indentation. This can be used to split up long strings without newlines into multiple
21+
lines of code. ([#40753])
1922

2023
Language changes
2124
----------------
@@ -114,6 +117,8 @@ Standard library changes
114117
* `@lock` is now exported from Base ([#39588]).
115118
* The experimental function `Base.catch_stack()` has been renamed to `current_exceptions()`, exported from Base and given a more specific return type ([#29901])
116119
* Some degree trigonometric functions, `sind`, `cosd`, `tand`, `asind`, `acosd`, `asecd`, `acscd`, `acotd`, `atand` now accept an square matrix ([#39758]).
120+
* A backslash before a newline in command literals now always removes the newline, similar to standard string
121+
literals, whereas the result was not well-defined before. ([#40753])
117122

118123
#### Package Manager
119124

base/shell.jl

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -87,15 +87,18 @@ function shell_parse(str::AbstractString, interpolate::Bool=true;
8787
elseif !in_single_quotes && c == '"'
8888
in_double_quotes = !in_double_quotes
8989
i = consume_upto!(arg, s, i, j)
90-
elseif c == '\\'
91-
if in_double_quotes
90+
elseif !in_single_quotes && c == '\\'
91+
if !isempty(st) && peek(st)[2] == '\n'
92+
i = consume_upto!(arg, s, i, j) + 1
93+
_ = popfirst!(st)
94+
elseif in_double_quotes
9295
isempty(st) && error("unterminated double quote")
9396
k, c′ = peek(st)
9497
if c′ == '"' || c′ == '$' || c′ == '\\'
9598
i = consume_upto!(arg, s, i, j)
9699
_ = popfirst!(st)
97100
end
98-
elseif !in_single_quotes
101+
else
99102
isempty(st) && error("dangling backslash")
100103
i = consume_upto!(arg, s, i, j)
101104
_ = popfirst!(st)

doc/src/manual/strings.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,14 @@ julia> """Contains "quote" characters"""
166166
"Contains \"quote\" characters"
167167
```
168168

169+
Long lines in strings can be broken up by preceding the newline with a backslash (`\`):
170+
171+
```jldoctest
172+
julia> "This is a long \
173+
line"
174+
"This is a long line"
175+
```
176+
169177
If you want to extract a character from a string, you index into it:
170178

171179
```jldoctest helloworldstring
@@ -639,6 +647,15 @@ julia> """
639647
"Hello,\nworld."
640648
```
641649

650+
If the newline is removed using a backslash, dedentation will be respected as well:
651+
652+
```jldoctest
653+
julia> """
654+
Averylong\
655+
word"""
656+
"Averylongword"
657+
```
658+
642659
Trailing whitespace is left unaltered.
643660

644661
Triple-quoted string literals can contain `"` characters without escaping.

src/julia-parser.scm

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,9 @@
311311
(define (numchk n s)
312312
(or n (error (string "invalid numeric constant \"" s "\""))))
313313

314+
(define (string-lastchar s)
315+
(string.char s (string.dec s (length s))))
316+
314317
(define (read-number port leadingdot neg)
315318
(let ((str (open-output-string))
316319
(pred char-numeric?)
@@ -412,7 +415,7 @@
412415
(string.sub s 1)
413416
s)
414417
r is-float32-literal)))
415-
(if (and (eqv? #\. (string.char s (string.dec s (length s))))
418+
(if (and (eqv? #\. (string-lastchar s))
416419
(let ((nxt (peek-char port)))
417420
(and (not (eof-object? nxt))
418421
(or (identifier-start-char? nxt)
@@ -2182,16 +2185,35 @@
21822185
(define (unescape-parsed-string-literal strs)
21832186
(map-at even? unescape-string strs))
21842187

2188+
;; remove `\` followed by a newline
2189+
(define (strip-escaped-newline s)
2190+
(let ((in (open-input-string s))
2191+
(out (open-output-string)))
2192+
(define (loop preceding-backslash?)
2193+
(let ((c (read-char in)))
2194+
(cond ((eof-object? c))
2195+
(preceding-backslash?
2196+
(if (not (eqv? c #\newline))
2197+
(begin (write-char #\\ out) (write-char c out)))
2198+
(loop #f))
2199+
((eqv? c #\\) (loop #t))
2200+
(else (write-char c out) (loop #f)))))
2201+
(loop #f)
2202+
(io.tostring! out)))
2203+
21852204
(define (parse-string-literal s delim raw)
2186-
(let ((p (ts:port s)))
2187-
((if raw identity unescape-parsed-string-literal)
2188-
(if (eqv? (peek-char p) delim)
2189-
(if (eqv? (peek-char (take-char p)) delim)
2190-
(map-first strip-leading-newline
2191-
(dedent-triplequoted-string
2192-
(parse-string-literal- 2 (take-char p) s delim raw)))
2193-
(list ""))
2194-
(parse-string-literal- 0 p s delim raw)))))
2205+
(let* ((p (ts:port s))
2206+
(str (if (eqv? (peek-char p) delim)
2207+
(if (eqv? (peek-char (take-char p)) delim)
2208+
(map-first strip-leading-newline
2209+
(dedent-triplequoted-string
2210+
(parse-string-literal- 2 (take-char p) s delim raw)))
2211+
(list ""))
2212+
(parse-string-literal- 0 p s delim raw))))
2213+
(if raw str (unescape-parsed-string-literal
2214+
(map (lambda (s)
2215+
(if (string? s) (strip-escaped-newline s) s))
2216+
str)))))
21952217

21962218
(define (strip-leading-newline s)
21972219
(let ((n (sizeof s)))

test/syntax.jl

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2830,3 +2830,100 @@ end
28302830
x[3], x[1:2]... = x
28312831
@test x == [2, 3, 1]
28322832
end
2833+
2834+
@testset "escaping newlines inside strings" begin
2835+
c = "c"
2836+
2837+
@test "a\
2838+
b" == "ab"
2839+
@test "a\
2840+
b" == "a b"
2841+
@test raw"a\
2842+
b" == "a\\\nb"
2843+
@test "a$c\
2844+
b" == "acb"
2845+
@test "\\
2846+
" == "\\\n"
2847+
2848+
2849+
@test """
2850+
a\
2851+
b""" == "ab"
2852+
@test """
2853+
a\
2854+
b""" == "a b"
2855+
@test """
2856+
a\
2857+
b""" == " ab"
2858+
@test raw"""
2859+
a\
2860+
b""" == "a\\\nb"
2861+
@test """
2862+
a$c\
2863+
b""" == "acb"
2864+
2865+
@test """
2866+
\
2867+
""" == ""
2868+
@test """
2869+
\\
2870+
""" == "\\\n"
2871+
@test """
2872+
\\\
2873+
""" == "\\"
2874+
@test """
2875+
\\\\
2876+
""" == "\\\\\n"
2877+
@test """
2878+
\\\\\
2879+
""" == "\\\\"
2880+
@test """
2881+
\
2882+
\
2883+
""" == ""
2884+
@test """
2885+
\\
2886+
\
2887+
""" == "\\\n"
2888+
@test """
2889+
\\\
2890+
\
2891+
""" == "\\"
2892+
2893+
2894+
@test `a\
2895+
b` == `ab`
2896+
@test `a\
2897+
b` == `a b`
2898+
@test `a$c\
2899+
b` == `acb`
2900+
@test `"a\
2901+
b"` == `ab`
2902+
@test `'a\
2903+
b'` == `$("a\\\nb")`
2904+
@test `\\
2905+
` == `'\'`
2906+
2907+
2908+
@test ```
2909+
a\
2910+
b``` == `ab`
2911+
@test ```
2912+
a\
2913+
b``` == `a b`
2914+
@test ```
2915+
a\
2916+
b``` == ` ab`
2917+
@test ```
2918+
a$c\
2919+
b``` == `acb`
2920+
@test ```
2921+
"a\
2922+
b"``` == `ab`
2923+
@test ```
2924+
'a\
2925+
b'``` == `$("a\\\nb")`
2926+
@test ```
2927+
\\
2928+
``` == `'\'`
2929+
end

0 commit comments

Comments
 (0)