Skip to content

Commit 88f74b7

Browse files
fix reinterpret(Char, ::UInt32) for "unnatural" values (fix #29181) (#29192)
This code was assuming that character values only have bit-patterns that decoding a string can produce, but of course `reinterpret` can produce any bit pattern in a `Char` whatsoever. The fix doesn't use that assumption and only uses the cache for actual ASCII characters.
1 parent 1324ceb commit 88f74b7

File tree

2 files changed

+17
-9
lines changed

2 files changed

+17
-9
lines changed

src/datatype.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -689,8 +689,9 @@ static jl_value_t *boxed_char_cache[128];
689689
JL_DLLEXPORT jl_value_t *jl_box_char(uint32_t x)
690690
{
691691
jl_ptls_t ptls = jl_get_ptls_states();
692-
if (0 < (int32_t)x)
693-
return boxed_char_cache[x >> 24];
692+
uint32_t u = bswap_32(x);
693+
if (u < 128)
694+
return boxed_char_cache[(uint8_t)u];
694695
jl_value_t *v = jl_gc_alloc(ptls, sizeof(void*), jl_char_type);
695696
*(uint32_t*)jl_data_ptr(v) = x;
696697
return v;

test/char.jl

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -271,11 +271,18 @@ end
271271
@test ncodeunits('\U10ffff') == 4
272272
# invalid encodings
273273
@test ncodeunits(reinterpret(Char, 0x80_00_00_00)) == 1
274-
@test ncodeunits(reinterpret(Char, 0x81_00_00_00)) == 1
275-
@test ncodeunits(reinterpret(Char, 0x80_80_00_00)) == 2
276-
@test ncodeunits(reinterpret(Char, 0x80_01_00_00)) == 2
277-
@test ncodeunits(reinterpret(Char, 0x80_00_80_00)) == 3
278-
@test ncodeunits(reinterpret(Char, 0x80_00_01_00)) == 3
279-
@test ncodeunits(reinterpret(Char, 0x80_00_00_80)) == 4
280-
@test ncodeunits(reinterpret(Char, 0x80_00_00_01)) == 4
274+
@test ncodeunits(reinterpret(Char, 0x01_00_00_00)) == 1
275+
@test ncodeunits(reinterpret(Char, 0x00_80_00_00)) == 2
276+
@test ncodeunits(reinterpret(Char, 0x00_01_00_00)) == 2
277+
@test ncodeunits(reinterpret(Char, 0x00_00_80_00)) == 3
278+
@test ncodeunits(reinterpret(Char, 0x00_00_01_00)) == 3
279+
@test ncodeunits(reinterpret(Char, 0x00_00_00_80)) == 4
280+
@test ncodeunits(reinterpret(Char, 0x00_00_00_01)) == 4
281+
end
282+
283+
@testset "reinterpret(Char, ::UInt32)" begin
284+
for s = 0:31
285+
u = one(UInt32) << s
286+
@test reinterpret(UInt32, reinterpret(Char, u)) === u
287+
end
281288
end

0 commit comments

Comments
 (0)