Skip to content

Commit 197785d

Browse files
authored
Merge pull request #24751 from JuliaLang/jn/runtime_intrinsics_less_alloc
runtime-intrinsics: reduce allocations for common types
2 parents 7198ddf + 3fee8a3 commit 197785d

File tree

2 files changed

+42
-45
lines changed

2 files changed

+42
-45
lines changed

src/datatype.c

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -486,21 +486,24 @@ JL_DLLEXPORT jl_datatype_t *jl_new_primitivetype(jl_value_t *name, jl_module_t *
486486

487487
JL_DLLEXPORT jl_value_t *jl_new_bits(jl_value_t *dt, void *data)
488488
{
489-
// data may not have the alignment required by the data type.
489+
// data may not have the alignment required by the size
490+
// but will always have the alignment required by the datatype
490491
jl_ptls_t ptls = jl_get_ptls_states();
491492
assert(jl_is_datatype(dt));
492493
jl_datatype_t *bt = (jl_datatype_t*)dt;
493494
size_t nb = jl_datatype_size(bt);
495+
// some types have special pools to minimize allocations
494496
if (nb == 0) return jl_new_struct_uninit(bt); // returns bt->instance
497+
if (bt == jl_bool_type) return (1 & *(int8_t*)data) ? jl_true : jl_false;
495498
if (bt == jl_uint8_type) return jl_box_uint8(*(uint8_t*)data);
496-
if (bt == jl_int64_type) return jl_box_int64(jl_load_unaligned_i64(data));
497-
if (bt == jl_bool_type) return (*(int8_t*)data) ? jl_true : jl_false;
498-
if (bt == jl_int32_type) return jl_box_int32(jl_load_unaligned_i32(data));
499-
if (bt == jl_float64_type) {
500-
double f;
501-
memcpy(&f, data, 8);
502-
return jl_box_float64(f);
503-
}
499+
if (bt == jl_int64_type) return jl_box_int64(*(int64_t*)data);
500+
if (bt == jl_int32_type) return jl_box_int32(*(int32_t*)data);
501+
if (bt == jl_int8_type) return jl_box_int8(*(int8_t*)data);
502+
if (bt == jl_int16_type) return jl_box_int16(*(int16_t*)data);
503+
if (bt == jl_uint64_type) return jl_box_uint64(*(uint64_t*)data);
504+
if (bt == jl_uint32_type) return jl_box_uint32(*(uint32_t*)data);
505+
if (bt == jl_uint16_type) return jl_box_uint16(*(uint16_t*)data);
506+
if (bt == jl_char_type) return jl_box_char(*(uint32_t*)data);
504507

505508
jl_value_t *v = jl_gc_alloc(ptls, nb, bt);
506509
switch (nb) {

src/runtime_intrinsics.c

Lines changed: 30 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -349,27 +349,26 @@ jl_value_t *jl_iintrinsic_1(jl_value_t *ty, jl_value_t *a, const char *name,
349349

350350
static inline jl_value_t *jl_intrinsiclambda_ty1(jl_value_t *ty, void *pa, unsigned osize, unsigned osize2, const void *voidlist)
351351
{
352-
jl_ptls_t ptls = jl_get_ptls_states();
353-
jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(ty), ty);
354352
intrinsic_1_t op = select_intrinsic_1(osize2, (const intrinsic_1_t*)voidlist);
355-
op(osize * host_char_bit, pa, jl_data_ptr(newv));
356-
return newv;
353+
void *pr = alloca(osize2);
354+
op(osize * host_char_bit, pa, pr);
355+
return jl_new_bits(ty, pr);
357356
}
358357

359358
static inline jl_value_t *jl_intrinsiclambda_u1(jl_value_t *ty, void *pa, unsigned osize, unsigned osize2, const void *voidlist)
360359
{
361360
jl_ptls_t ptls = jl_get_ptls_states();
362-
jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(ty), ty);
363361
intrinsic_u1_t op = select_intrinsic_u1(osize2, (const intrinsic_u1_t*)voidlist);
364-
unsigned cnt = op(osize * host_char_bit, pa);
365-
// TODO: the following memset/memcpy assumes little-endian
362+
uint64_t cnt = op(osize * host_char_bit, pa);
363+
// TODO: the following assume little-endian
366364
// for big-endian, need to copy from the other end of cnt
367-
if (osize > sizeof(unsigned)) {
368-
// perform zext, if needed
369-
memset((char*)jl_data_ptr(newv) + sizeof(unsigned), 0, osize - sizeof(unsigned));
370-
osize = sizeof(unsigned);
365+
if (osize <= sizeof(cnt)) {
366+
return jl_new_bits(ty, &cnt);
371367
}
372-
memcpy(jl_data_ptr(newv), &cnt, osize);
368+
jl_value_t *newv = jl_gc_alloc(ptls, osize, ty);
369+
// perform zext, if needed
370+
memset((char*)jl_data_ptr(newv) + sizeof(cnt), 0, osize - sizeof(cnt));
371+
memcpy(jl_data_ptr(newv), &cnt, sizeof(cnt));
373372
return newv;
374373
}
375374

@@ -385,7 +384,6 @@ JL_DLLEXPORT jl_value_t *jl_##name(jl_value_t *ty, jl_value_t *a) \
385384

386385
static inline jl_value_t *jl_intrinsic_cvt(jl_value_t *ty, jl_value_t *a, const char *name, intrinsic_cvt_t op)
387386
{
388-
jl_ptls_t ptls = jl_get_ptls_states();
389387
jl_value_t *aty = jl_typeof(a);
390388
if (!jl_is_primitivetype(aty))
391389
jl_errorf("%s: value is not a primitive type", name);
@@ -394,12 +392,13 @@ static inline jl_value_t *jl_intrinsic_cvt(jl_value_t *ty, jl_value_t *a, const
394392
void *pa = jl_data_ptr(a);
395393
unsigned isize = jl_datatype_size(aty);
396394
unsigned osize = jl_datatype_size(ty);
397-
jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(ty), ty);
398-
op(aty == (jl_value_t*)jl_bool_type ? 1 : isize * host_char_bit, pa,
399-
osize * host_char_bit, jl_data_ptr(newv));
400-
if (ty == (jl_value_t*)jl_bool_type)
401-
return *(uint8_t*)jl_data_ptr(newv) & 1 ? jl_true : jl_false;
402-
return newv;
395+
void *pr = alloca(osize);
396+
unsigned isize_bits = isize * host_char_bit;
397+
unsigned osize_bits = osize * host_char_bit;
398+
if (aty == (jl_value_t*)jl_bool_type)
399+
isize_bits = 1;
400+
op(isize_bits, pa, osize_bits, pr);
401+
return jl_new_bits(ty, pr);
403402
}
404403

405404
// floating point
@@ -544,7 +543,7 @@ jl_value_t *jl_iintrinsic_2(jl_value_t *a, jl_value_t *b, const char *name,
544543
void *pa = jl_data_ptr(a), *pb = jl_data_ptr(b);
545544
unsigned sz = jl_datatype_size(ty);
546545
unsigned sz2 = next_power_of_two(sz);
547-
unsigned szb = jl_datatype_size(tyb);
546+
unsigned szb = cvtb ? jl_datatype_size(tyb) : sz;
548547
if (sz2 > sz) {
549548
/* round type up to the appropriate c-type and set/clear the unused bits */
550549
void *pa2 = alloca(sz2);
@@ -553,10 +552,12 @@ jl_value_t *jl_iintrinsic_2(jl_value_t *a, jl_value_t *b, const char *name,
553552
pa = pa2;
554553
}
555554
if (sz2 > szb) {
556-
/* round type up to the appropriate c-type and set/clear/truncate the unused bits */
555+
/* round type up to the appropriate c-type and set/clear/truncate the unused bits
556+
* (zero-extend if cvtb is set, since in that case b is unsigned while the sign of a comes from the op)
557+
*/
557558
void *pb2 = alloca(sz2);
558559
memcpy(pb2, pb, szb);
559-
memset((char*)pb2 + szb, getsign(pb, sz), sz2 - szb);
560+
memset((char*)pb2 + szb, cvtb ? 0 : getsign(pb, szb), sz2 - szb);
560561
pb = pb2;
561562
}
562563
jl_value_t *newv = lambda2(ty, pa, pb, sz, sz2, list);
@@ -565,13 +566,10 @@ jl_value_t *jl_iintrinsic_2(jl_value_t *a, jl_value_t *b, const char *name,
565566

566567
static inline jl_value_t *jl_intrinsiclambda_2(jl_value_t *ty, void *pa, void *pb, unsigned sz, unsigned sz2, const void *voidlist)
567568
{
568-
jl_ptls_t ptls = jl_get_ptls_states();
569-
jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(ty), ty);
569+
void *pr = alloca(sz2);
570570
intrinsic_2_t op = select_intrinsic_2(sz2, (const intrinsic_2_t*)voidlist);
571-
op(sz * host_char_bit, pa, pb, jl_data_ptr(newv));
572-
if (ty == (jl_value_t*)jl_bool_type)
573-
return *(uint8_t*)jl_data_ptr(newv) & 1 ? jl_true : jl_false;
574-
return newv;
571+
op(sz * host_char_bit, pa, pb, pr);
572+
return jl_new_bits(ty, pr);
575573
}
576574

577575
static inline jl_value_t *jl_intrinsiclambda_cmp(jl_value_t *ty, void *pa, void *pb, unsigned sz, unsigned sz2, const void *voidlist)
@@ -586,7 +584,7 @@ static inline jl_value_t *jl_intrinsiclambda_checked(jl_value_t *ty, void *pa, v
586584
jl_value_t *params[2];
587585
params[0] = ty;
588586
params[1] = (jl_value_t*)jl_bool_type;
589-
jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params,2);
587+
jl_datatype_t *tuptyp = jl_apply_tuple_type_v(params, 2);
590588
jl_ptls_t ptls = jl_get_ptls_states();
591589
jl_value_t *newv = jl_gc_alloc(ptls, ((jl_datatype_t*)tuptyp)->size, tuptyp);
592590

@@ -599,16 +597,12 @@ static inline jl_value_t *jl_intrinsiclambda_checked(jl_value_t *ty, void *pa, v
599597
}
600598
static inline jl_value_t *jl_intrinsiclambda_checkeddiv(jl_value_t *ty, void *pa, void *pb, unsigned sz, unsigned sz2, const void *voidlist)
601599
{
602-
jl_ptls_t ptls = jl_get_ptls_states();
603-
jl_value_t *newv = jl_gc_alloc(ptls, jl_datatype_size(ty), ty);
600+
void *pr = alloca(sz2);
604601
intrinsic_checked_t op = select_intrinsic_checked(sz2, (const intrinsic_checked_t*)voidlist);
605-
int ovflw = op(sz * host_char_bit, pa, pb, jl_data_ptr(newv));
602+
int ovflw = op(sz * host_char_bit, pa, pb, pr);
606603
if (ovflw)
607604
jl_throw(jl_diverror_exception);
608-
if (ty == (jl_value_t*)jl_bool_type)
609-
return *(uint8_t*)jl_data_ptr(newv) & 1 ? jl_true : jl_false;
610-
611-
return newv;
605+
return jl_new_bits(ty, pr);
612606
}
613607

614608
// floating point

0 commit comments

Comments
 (0)