Skip to content

Commit bb732b1

Browse files
StefanKarpinskiKristofferC
authored andcommitted
fix reinterpret(Char, ::UInt32) for "unnatural" values (fix #29181) (#29192)
This code was assuming that character values only have bit-patterns that decoding a string can produce, but of course `reinterpret` can produce any bit pattern in a `Char` whatsoever. The fix doesn't use that assumption and only uses the cache for actual ASCII characters. (cherry picked from commit 88f74b7)
1 parent 1a96057 commit bb732b1

File tree

2 files changed

+33
-2
lines changed

2 files changed

+33
-2
lines changed

src/datatype.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -688,8 +688,9 @@ static jl_value_t *boxed_char_cache[128];
688688
JL_DLLEXPORT jl_value_t *jl_box_char(uint32_t x)
689689
{
690690
jl_ptls_t ptls = jl_get_ptls_states();
691-
if (0 < (int32_t)x)
692-
return boxed_char_cache[x >> 24];
691+
uint32_t u = bswap_32(x);
692+
if (u < 128)
693+
return boxed_char_cache[(uint8_t)u];
693694
jl_value_t *v = jl_gc_alloc(ptls, sizeof(void*), jl_char_type);
694695
*(uint32_t*)jl_data_ptr(v) = x;
695696
return v;

test/char.jl

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,3 +256,33 @@ Base.codepoint(c::ASCIIChar) = reinterpret(UInt8, c)
256256
@test_throws MethodError write(IOBuffer(), ASCIIChar('x'))
257257
@test_throws MethodError read(IOBuffer('x'), ASCIIChar)
258258
end
259+
260+
@testset "ncodeunits(::Char)" begin
261+
# valid encodings
262+
@test ncodeunits('\0') == 1
263+
@test ncodeunits('\x1') == 1
264+
@test ncodeunits('\x7f') == 1
265+
@test ncodeunits('\u80') == 2
266+
@test ncodeunits('\uff') == 2
267+
@test ncodeunits('\u7ff') == 2
268+
@test ncodeunits('\u800') == 3
269+
@test ncodeunits('\uffff') == 3
270+
@test ncodeunits('\U10000') == 4
271+
@test ncodeunits('\U10ffff') == 4
272+
# invalid encodings
273+
@test ncodeunits(reinterpret(Char, 0x80_00_00_00)) == 1
274+
@test ncodeunits(reinterpret(Char, 0x01_00_00_00)) == 1
275+
@test ncodeunits(reinterpret(Char, 0x00_80_00_00)) == 2
276+
@test ncodeunits(reinterpret(Char, 0x00_01_00_00)) == 2
277+
@test ncodeunits(reinterpret(Char, 0x00_00_80_00)) == 3
278+
@test ncodeunits(reinterpret(Char, 0x00_00_01_00)) == 3
279+
@test ncodeunits(reinterpret(Char, 0x00_00_00_80)) == 4
280+
@test ncodeunits(reinterpret(Char, 0x00_00_00_01)) == 4
281+
end
282+
283+
@testset "reinterpret(Char, ::UInt32)" begin
284+
for s = 0:31
285+
u = one(UInt32) << s
286+
@test reinterpret(UInt32, reinterpret(Char, u)) === u
287+
end
288+
end

0 commit comments

Comments
 (0)