Skip to content

Commit 6bf51a1

Browse files
vtjnashNHDaly
authored andcommitted
remove wasted allocation space for JL_ARRAY_IMPL_NUL (#51275)
Handle this more correctly in Cstring conversion instead. Although apparently this feature and test has not worked in Julia since String became a first class object (and not wrapping Array). Refs: cee4cb7
1 parent 35791e2 commit 6bf51a1

File tree

5 files changed

+35
-105
lines changed

5 files changed

+35
-105
lines changed

base/c.jl

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,7 +199,7 @@ cconvert(::Type{Cstring}, s::AbstractString) =
199199

200200
function cconvert(::Type{Cwstring}, s::AbstractString)
201201
v = transcode(Cwchar_t, String(s))
202-
!isempty(v) && v[end] == 0 || push!(v, 0)
202+
push!(v, 0)
203203
return v
204204
end
205205

@@ -211,7 +211,7 @@ containsnul(p::Ptr, len) =
211211
containsnul(s::String) = containsnul(unsafe_convert(Ptr{Cchar}, s), sizeof(s))
212212
containsnul(s::AbstractString) = '\0' in s
213213

214-
function unsafe_convert(::Type{Cstring}, s::Union{String,AbstractVector{UInt8}})
214+
function unsafe_convert(::Type{Cstring}, s::String)
215215
p = unsafe_convert(Ptr{Cchar}, s)
216216
containsnul(p, sizeof(s)) &&
217217
throw(ArgumentError("embedded NULs are not allowed in C strings: $(repr(s))"))

src/array.c

Lines changed: 33 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,15 @@
1616
extern "C" {
1717
#endif
1818

19-
#define JL_ARRAY_IMPL_NUL 1
19+
// similar to MEMDEBUG's purpose, this ensures there is a garbage byte after a
20+
// most UInt8 arrays (even String-backed) so that almost any C string
21+
// processing on it is guaranteed to run too far and return some garbage
22+
// answers to warn the user of their bug.
23+
#ifdef NDEBUG
24+
#define JL_ARRAY_MEMDEBUG_TERMINATOR 0
25+
#else
26+
#define JL_ARRAY_MEMDEBUG_TERMINATOR 1
27+
#endif
2028

2129
#define JL_ARRAY_ALIGN(jl_value, nbytes) LLT_ALIGN(jl_value, nbytes)
2230

@@ -110,7 +118,7 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
110118
else if (validated == 2)
111119
jl_error("invalid Array size");
112120
if (isunboxed) {
113-
if (elsz == 1 && !isunion) {
121+
if (JL_ARRAY_MEMDEBUG_TERMINATOR && elsz == 1 && !isunion) {
114122
// extra byte for all julia allocated byte arrays
115123
tot++;
116124
}
@@ -150,9 +158,11 @@ static jl_array_t *_new_array_(jl_value_t *atype, uint32_t ndims, size_t *dims,
150158

151159
if (zeroinit)
152160
memset(data, 0, tot);
161+
if (JL_ARRAY_MEMDEBUG_TERMINATOR && elsz == 1 && !isunion) {
162+
((char*)data)[tot - 1] = 0xfe;
163+
msan_allocated_memory(&((char*)data)[tot - 1], 1);
164+
}
153165
a->data = data;
154-
if (JL_ARRAY_IMPL_NUL && elsz == 1)
155-
((char*)data)[tot - 1] = '\0';
156166
a->length = nel;
157167
a->flags.ndims = ndims;
158168
a->flags.ptrarray = !isunboxed;
@@ -345,7 +355,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array_1d(jl_value_t *atype, void *data,
345355
if (own_buffer) {
346356
a->flags.how = 2;
347357
jl_gc_track_malloced_array(ct->ptls, a);
348-
jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
358+
jl_gc_count_allocd(nel*elsz);
349359
}
350360
else {
351361
a->flags.how = 0;
@@ -411,7 +421,7 @@ JL_DLLEXPORT jl_array_t *jl_ptr_to_array(jl_value_t *atype, void *data,
411421
if (own_buffer) {
412422
a->flags.how = 2;
413423
jl_gc_track_malloced_array(ct->ptls, a);
414-
jl_gc_count_allocd(nel*elsz + (elsz == 1 ? 1 : 0));
424+
jl_gc_count_allocd(nel*elsz);
415425
}
416426
else {
417427
a->flags.how = 0;
@@ -476,6 +486,8 @@ JL_DLLEXPORT jl_value_t *jl_array_to_string(jl_array_t *a)
476486
a->nrows = 0;
477487
a->length = 0;
478488
a->maxsize = 0;
489+
if (jl_string_data(o)[len] != '\0')
490+
jl_string_data(o)[len] = '\0';
479491
return o;
480492
}
481493
}
@@ -657,7 +669,7 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
657669
size_t oldlen = a->nrows;
658670
int isbitsunion = jl_array_isbitsunion(a);
659671
assert(nbytes >= oldnbytes);
660-
if (elsz == 1 && !isbitsunion) {
672+
if (JL_ARRAY_MEMDEBUG_TERMINATOR && elsz == 1 && !isbitsunion) {
661673
nbytes++;
662674
oldnbytes++;
663675
}
@@ -676,11 +688,11 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
676688
// if data is in a String, keep it that way
677689
jl_value_t *s;
678690
if (a->flags.isshared) {
679-
s = jl_alloc_string(nbytes - (elsz == 1));
691+
s = jl_alloc_string(nbytes - (JL_ARRAY_MEMDEBUG_TERMINATOR && elsz == 1));
680692
newbuf = 1;
681693
}
682694
else {
683-
s = jl_gc_realloc_string(jl_array_data_owner(a), nbytes - (elsz == 1));
695+
s = jl_gc_realloc_string(jl_array_data_owner(a), nbytes - (JL_ARRAY_MEMDEBUG_TERMINATOR && elsz == 1));
684696
}
685697
jl_array_data_owner(a) = s;
686698
jl_gc_wb(a, s);
@@ -700,8 +712,10 @@ static int NOINLINE array_resize_buffer(jl_array_t *a, size_t newlen)
700712
jl_gc_wb_buf(a, a->data, nbytes);
701713
}
702714
}
703-
if (JL_ARRAY_IMPL_NUL && elsz == 1 && !isbitsunion)
704-
memset((char*)a->data + oldnbytes - 1, 0, nbytes - oldnbytes + 1);
715+
if (JL_ARRAY_MEMDEBUG_TERMINATOR && elsz == 1 && !isbitsunion) {
716+
memset((char*)a->data + oldnbytes - 1, 0xfe, nbytes - oldnbytes + 1);
717+
msan_allocated_memory((char*)a->data + oldnbytes - 1, nbytes - oldnbytes + 1);
718+
}
705719
(void)oldlen;
706720
assert(oldlen == a->nrows &&
707721
"Race condition detected: recursive resizing on the same array.");
@@ -975,7 +989,7 @@ STATIC_INLINE void jl_array_shrink(jl_array_t *a, size_t dec)
975989
oldnbytes += a->maxsize;
976990
}
977991

978-
if (elsz == 1 && !isbitsunion) {
992+
if (JL_ARRAY_MEMDEBUG_TERMINATOR && elsz == 1 && !isbitsunion) {
979993
newbytes++;
980994
oldnbytes++;
981995
}
@@ -1055,8 +1069,8 @@ STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
10551069
if (__unlikely(newoffs != offset) || idx > 0) {
10561070
char *olddata = (char*)a->data;
10571071
char *newdata = olddata - (a->offset - newoffs) * elsz;
1058-
char *typetagdata;
1059-
char *newtypetagdata;
1072+
char *typetagdata = NULL;
1073+
char *newtypetagdata = NULL;
10601074
if (isbitsunion) {
10611075
typetagdata = jl_array_typetagdata(a);
10621076
newtypetagdata = typetagdata - (a->offset - newoffs);
@@ -1065,7 +1079,7 @@ STATIC_INLINE void jl_array_del_at_beg(jl_array_t *a, size_t idx, size_t dec,
10651079
size_t nb1 = idx * elsz; // size in bytes of the first block
10661080
size_t nbtotal = a->nrows * elsz; // size in bytes of the new array
10671081
// Implicit '\0' for byte arrays
1068-
if (elsz == 1 && !isbitsunion)
1082+
if (JL_ARRAY_MEMDEBUG_TERMINATOR && elsz == 1 && !isbitsunion)
10691083
nbtotal++;
10701084
if (idx > 0) {
10711085
memmove_safe(a->flags.hasptr, newdata, olddata, nb1);
@@ -1102,8 +1116,10 @@ STATIC_INLINE void jl_array_del_at_end(jl_array_t *a, size_t idx, size_t dec,
11021116
}
11031117
}
11041118
n -= dec;
1105-
if (elsz == 1 && !isbitsunion)
1106-
data[n] = 0;
1119+
if (JL_ARRAY_MEMDEBUG_TERMINATOR && elsz == 1 && !isbitsunion) {
1120+
data[n] = 0xfe;
1121+
msan_allocated_memory(&data[n], 1);
1122+
}
11071123
a->nrows = n;
11081124
a->length = n;
11091125
}
@@ -1279,47 +1295,6 @@ JL_DLLEXPORT jl_value_t *(jl_array_data_owner)(jl_array_t *a) JL_NOTSAFEPOINT
12791295
return jl_array_data_owner(a);
12801296
}
12811297

1282-
STATIC_INLINE int jl_has_implicit_byte_owned(jl_array_t *a)
1283-
{
1284-
assert(a->flags.how != 3);
1285-
if (!a->flags.isshared)
1286-
return 1;
1287-
return a->flags.how == 1;
1288-
}
1289-
1290-
STATIC_INLINE int jl_has_implicit_byte(jl_array_t *a)
1291-
{
1292-
// * unshared:
1293-
// * how: 0-2
1294-
// We own and allocated the data.
1295-
// It should have the extra byte.
1296-
// * shared:
1297-
// * how: 0, 2
1298-
// The data might come from external source without implicit NUL byte.
1299-
// There could be an entra byte for a `reinterpreted` array
1300-
// but that should be unlikely for strings.
1301-
// * how: 1
1302-
// We allocated the data with the extra byte.
1303-
// * how: 3
1304-
// We should check the owner.
1305-
if (a->flags.how == 3) {
1306-
a = (jl_array_t*)jl_array_data_owner(a);
1307-
if (jl_is_string(a)) return 1;
1308-
return a->elsize == 1 && jl_has_implicit_byte_owned(a);
1309-
}
1310-
return jl_has_implicit_byte_owned(a);
1311-
}
1312-
1313-
// Create an array with the same content
1314-
JL_DLLEXPORT jl_array_t *jl_array_cconvert_cstring(jl_array_t *a)
1315-
{
1316-
assert(jl_typeof(a) == jl_array_uint8_type);
1317-
if (!jl_has_implicit_byte(a))
1318-
a = jl_array_copy(a);
1319-
((char*)a->data)[a->nrows] = 0;
1320-
return a;
1321-
}
1322-
13231298
#ifdef __cplusplus
13241299
}
13251300
#endif

src/jl_exported_funcs.inc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@
2525
XX(jl_arrayref) \
2626
XX(jl_arrayset) \
2727
XX(jl_arrayunset) \
28-
XX(jl_array_cconvert_cstring) \
2928
XX(jl_array_copy) \
3029
XX(jl_array_del_at) \
3130
XX(jl_array_del_beg) \

src/julia_internal.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1493,8 +1493,6 @@ STATIC_INLINE void *jl_get_frame_addr(void)
14931493
#endif
14941494
}
14951495

1496-
JL_DLLEXPORT jl_array_t *jl_array_cconvert_cstring(jl_array_t *a);
1497-
14981496
// Log `msg` to the current logger by calling CoreLogging.logmsg_shim() on the
14991497
// julia side. If any of module, group, id, file or line are NULL, these will
15001498
// be passed to the julia side as `nothing`. If `kwargs` is NULL an empty set

test/core.jl

Lines changed: 0 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -4554,48 +4554,6 @@ test_shared_array_resize(Int)
45544554
test_shared_array_resize(Any)
45554555
end
45564556

4557-
module TestArrayNUL
4558-
using Test
4559-
function check_nul(a::Vector{UInt8})
4560-
b = ccall(:jl_array_cconvert_cstring,
4561-
Ref{Vector{UInt8}}, (Vector{UInt8},), a)
4562-
@test unsafe_load(pointer(b), length(b) + 1) == 0x0
4563-
return b === a
4564-
end
4565-
4566-
a = UInt8[]
4567-
b = "aaa"
4568-
c = [0x2, 0x1, 0x3]
4569-
4570-
@test check_nul(a)
4571-
@test check_nul(unsafe_wrap(Vector{UInt8},b))
4572-
@test check_nul(c)
4573-
d = [0x2, 0x1, 0x3]
4574-
@test check_nul(d)
4575-
push!(d, 0x3)
4576-
@test check_nul(d)
4577-
push!(d, 0x3)
4578-
@test check_nul(d)
4579-
ccall(:jl_array_del_end, Cvoid, (Any, UInt), d, 2)
4580-
@test check_nul(d)
4581-
ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 1)
4582-
@test check_nul(d)
4583-
ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 1)
4584-
@test check_nul(d)
4585-
ccall(:jl_array_grow_end, Cvoid, (Any, UInt), d, 10)
4586-
@test check_nul(d)
4587-
ccall(:jl_array_del_beg, Cvoid, (Any, UInt), d, 8)
4588-
@test check_nul(d)
4589-
ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), d, 8)
4590-
@test check_nul(d)
4591-
ccall(:jl_array_grow_beg, Cvoid, (Any, UInt), d, 8)
4592-
@test check_nul(d)
4593-
f = unsafe_wrap(Array, pointer(d), length(d))
4594-
@test !check_nul(f)
4595-
f = unsafe_wrap(Array, ccall(:malloc, Ptr{UInt8}, (Csize_t,), 10), 10, own = true)
4596-
@test !check_nul(f)
4597-
end
4598-
45994557
# Copy of `#undef`
46004558
copyto!(Vector{Any}(undef, 10), Vector{Any}(undef, 10))
46014559
function test_copy_alias(::Type{T}) where T

0 commit comments

Comments
 (0)