From ace5fb5728737679b0701d45bd277cf8057e9969 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Fri, 6 Jun 2025 14:35:39 +0000 Subject: [PATCH 1/2] add support for storing just the inferred inlining_cost In preparation for giving us the option of not storing code that does not seem useful immediately, but which we previously kept around just in case inference or codegen (always_inline) ever needed to decide if it was worthwhile to inline later. And also for investigating issues like the recently closed #58449 to examine whether the code was removed for either heuristic or correctness reasons. --- Compiler/src/optimize.jl | 4 +++- src/codegen.cpp | 2 +- src/gc-stock.h | 9 -------- src/gf.c | 2 +- src/ircode.c | 46 +++++++++++++++++++++++++++++++++++----- src/julia.h | 7 ++++-- src/precompile_utils.c | 2 +- src/staticdata.c | 20 +++++++++++------ 8 files changed, 65 insertions(+), 27 deletions(-) diff --git a/Compiler/src/optimize.jl b/Compiler/src/optimize.jl index 3532b2043d76f..da4a17c5d6913 100644 --- a/Compiler/src/optimize.jl +++ b/Compiler/src/optimize.jl @@ -113,7 +113,9 @@ set_inlineable!(src::CodeInfo, val::Bool) = function inline_cost_clamp(x::Int) x > MAX_INLINE_COST && return MAX_INLINE_COST x < MIN_INLINE_COST && return MIN_INLINE_COST - return convert(InlineCostType, x) + x = ccall(:jl_encode_inlining_cost, UInt8, (InlineCostType,), x) + x = ccall(:jl_decode_inlining_cost, InlineCostType, (UInt8,), x) + return x end const SRC_FLAG_DECLARED_INLINE = 0x1 diff --git a/src/codegen.cpp b/src/codegen.cpp index a736449813608..2a1f044345d08 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -9900,7 +9900,7 @@ void emit_always_inline(orc::ThreadSafeModule &result_m, jl_codegen_params_t &pa src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred); jl_method_instance_t *mi = jl_get_ci_mi(codeinst); jl_method_t *def = mi->def.method; - if (src && (jl_value_t*)src != jl_nothing && jl_is_method(def) && jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) + if (src && jl_is_string((jl_value_t*)src) && jl_is_method(def) && jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) src = jl_uncompress_ir(def, codeinst, (jl_value_t*)src); if (src && jl_is_code_info(src) && jl_ir_inlining_cost((jl_value_t*)src) < UINT16_MAX) { jl_llvm_functions_t decls = jl_emit_codeinst(result_m, codeinst, src, params); // contains safepoints diff --git a/src/gc-stock.h b/src/gc-stock.h index d478ee1366da0..8e27893697f68 100644 --- a/src/gc-stock.h +++ b/src/gc-stock.h @@ -365,15 +365,6 @@ STATIC_INLINE jl_gc_pagemeta_t *pop_page_metadata_back(jl_gc_pagemeta_t **ppg) J return v; } -#ifdef __clang_gcanalyzer__ /* clang may not have __builtin_ffs */ -unsigned ffs_u32(uint32_t bitvec) JL_NOTSAFEPOINT; -#else -STATIC_INLINE unsigned ffs_u32(uint32_t bitvec) -{ - return __builtin_ffs(bitvec) - 1; -} -#endif - extern bigval_t *oldest_generation_of_bigvals; extern int64_t buffered_pages; extern int gc_first_tid; diff --git a/src/gf.c b/src/gf.c index 8205cf70b99c3..8ca5761ce56ff 100644 --- a/src/gf.c +++ b/src/gf.c @@ -357,7 +357,7 @@ static int emit_codeinst_and_edges(jl_code_instance_t *codeinst) JL_GC_PUSH1(&code); jl_method_instance_t *mi = jl_get_ci_mi(codeinst); jl_method_t *def = mi->def.method; - if (jl_is_string(code) && jl_is_method(def)) + if (jl_is_method(def)) code = (jl_value_t*)jl_uncompress_ir(def, codeinst, (jl_value_t*)code); if (jl_is_code_info(code)) { jl_emit_codeinst_to_jit(codeinst, (jl_code_info_t*)code); diff --git a/src/ircode.c b/src/ircode.c index 9a94c4c62431a..a79f8e900b5fd 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -989,7 +989,7 @@ static int codelocs_nstmts(jl_string_t *cl) JL_NOTSAFEPOINT #define IR_DATASIZE_FLAGS sizeof(uint16_t) #define IR_DATASIZE_PURITY sizeof(uint16_t) -#define IR_DATASIZE_INLINING_COST sizeof(uint16_t) +#define IR_DATASIZE_INLINING_COST sizeof(uint8_t) #define IR_DATASIZE_NSLOTS sizeof(int32_t) typedef enum { ir_offset_flags = 0, @@ -1044,7 +1044,7 @@ JL_DLLEXPORT jl_string_t *jl_compress_ir(jl_method_t *m, jl_code_info_t *code) code->ssaflags); write_uint16(s.s, checked_size(flags.packed, IR_DATASIZE_FLAGS)); write_uint16(s.s, checked_size(code->purity.bits, IR_DATASIZE_PURITY)); - write_uint16(s.s, checked_size(code->inlining_cost, IR_DATASIZE_INLINING_COST)); + write_uint8(s.s, checked_size(jl_encode_inlining_cost(code->inlining_cost), IR_DATASIZE_INLINING_COST)); size_t nslots = jl_array_nrows(code->slotflags); assert(nslots >= m->nargs && nslots < INT32_MAX); // required by generated functions @@ -1109,6 +1109,8 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t { if (jl_is_code_info(data)) return (jl_code_info_t*)data; + if (!jl_is_string(data)) + return (jl_code_info_t*)jl_nothing; JL_TIMING(AST_UNCOMPRESS, AST_UNCOMPRESS); JL_LOCK(&m->writelock); // protect the roots array (Might GC) assert(jl_is_method(m)); @@ -1139,7 +1141,7 @@ JL_DLLEXPORT jl_code_info_t *jl_uncompress_ir(jl_method_t *m, jl_code_instance_t code->nospecializeinfer = flags.bits.nospecializeinfer; code->isva = flags.bits.isva; code->purity.bits = read_uint16(s.s); - code->inlining_cost = read_uint16(s.s); + code->inlining_cost = jl_decode_inlining_cost(read_uint8(s.s)); size_t nslots = read_int32(s.s); code->slotflags = jl_alloc_array_1d(jl_array_uint8_type, nslots); @@ -1240,12 +1242,46 @@ JL_DLLEXPORT uint8_t jl_ir_flag_has_image_globalref(jl_string_t *data) return flags.bits.has_image_globalref; } -JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_string_t *data) +// create a compressed u16 value with range 0..3968, 3 bits exponent, 5 bits mantissa, implicit first digit, rounding up, full accuracy over 0..63 +JL_DLLEXPORT uint8_t jl_encode_inlining_cost(uint16_t inlining_cost) { + unsigned shift = 0; + unsigned mantissa; + if (inlining_cost <= 0x1f) { + mantissa = inlining_cost; + } + else { + while (inlining_cost >> 5 >> shift != 0) + shift++; + assert(1 <= shift && shift <= 11); + mantissa = (inlining_cost >> (shift - 1)) & 0x1f; + mantissa += (inlining_cost & ((1 << (shift - 1)) - 1)) != 0; // round up if trailing bits non-zero, overflowing into exp + } + unsigned r = (shift << 5) + mantissa; + if (r > 0xff) + r = 0xff; + return r; +} + +JL_DLLEXPORT uint16_t jl_decode_inlining_cost(uint8_t inlining_cost) +{ + unsigned shift = inlining_cost >> 5; + if (inlining_cost == 0xff) + return 0xffff; + else if (shift == 0) + return inlining_cost; + else + return 0x20 | ((inlining_cost & 0x1f) << (shift - 1)); +} + +JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_value_t *data) +{ + if (jl_is_uint8(data)) + return jl_decode_inlining_cost(*(uint8_t*)data); if (jl_is_code_info(data)) return ((jl_code_info_t*)data)->inlining_cost; assert(jl_is_string(data)); - uint16_t res = jl_load_unaligned_i16(jl_string_data(data) + ir_offset_inlining_cost); + uint16_t res = jl_decode_inlining_cost(*(uint8_t*)(jl_string_data(data) + ir_offset_inlining_cost)); return res; } diff --git a/src/julia.h b/src/julia.h index 6c1c8af0a788b..12389dd13a9ff 100644 --- a/src/julia.h +++ b/src/julia.h @@ -438,10 +438,11 @@ typedef struct _jl_code_instance_t { jl_value_t *rettype_const; // inferred constant return value, or null // Inferred result. When part of the runtime cache, either - // - A jl_code_info_t (may be compressed) containing the inferred IR + // - A jl_code_info_t (may be compressed as a String) containing the inferred IR // - jl_nothing, indicating that inference was completed, but the result was // deleted to save space. - // - null, indicating that inference was not yet completed or did not succeed + // - UInt8, indicating that inference recorded the estimated inlining cost, but deleted the result to save space + // - NULL, indicating that inference was not yet completed or did not succeed _Atomic(jl_value_t *) inferred; _Atomic(jl_debuginfo_t *) debuginfo; // stored information about edges from this object (set once, with a happens-before both source and invoke) _Atomic(jl_svec_t *) edges; // forward edge info @@ -2310,6 +2311,8 @@ JL_DLLEXPORT jl_value_t *jl_uncompress_argname_n(jl_value_t *syms, size_t i); JL_DLLEXPORT struct jl_codeloc_t jl_uncompress1_codeloc(jl_value_t *cl, size_t pc) JL_NOTSAFEPOINT; JL_DLLEXPORT jl_value_t *jl_compress_codelocs(int32_t firstline, jl_value_t *codelocs, size_t nstmts); JL_DLLEXPORT jl_value_t *jl_uncompress_codelocs(jl_value_t *cl, size_t nstmts); +JL_DLLEXPORT uint8_t jl_encode_inlining_cost(uint16_t inlining_cost) JL_NOTSAFEPOINT; +JL_DLLEXPORT uint16_t jl_decode_inlining_cost(uint8_t inlining_cost) JL_NOTSAFEPOINT; JL_DLLEXPORT int jl_is_operator(const char *sym); JL_DLLEXPORT int jl_is_unary_operator(const char *sym); diff --git a/src/precompile_utils.c b/src/precompile_utils.c index 86bb723443925..491f111ac4746 100644 --- a/src/precompile_utils.c +++ b/src/precompile_utils.c @@ -208,7 +208,7 @@ static int precompile_enq_specialization_(jl_method_instance_t *mi, void *closur jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred); if (inferred && (jl_options.compile_enabled == JL_OPTIONS_COMPILE_ALL || inferred == jl_nothing || - ((jl_is_string(inferred) || jl_is_code_info(inferred)) && jl_ir_inlining_cost(inferred) == UINT16_MAX))) { + ((jl_is_string(inferred) || jl_is_code_info(inferred) || jl_is_uint8(inferred)) && jl_ir_inlining_cost(inferred) == UINT16_MAX))) { do_compile = 1; } else if (jl_atomic_load_relaxed(&codeinst->invoke) != NULL || jl_atomic_load_relaxed(&codeinst->precompile)) { diff --git a/src/staticdata.c b/src/staticdata.c index 92e7f494ad35d..6038b43f1c75d 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -901,30 +901,36 @@ static void jl_insert_into_serialization_queue(jl_serializer_state *s, jl_value_ } } jl_value_t *inferred = jl_atomic_load_relaxed(&ci->inferred); - if (inferred && inferred != jl_nothing) { // disregard if there is nothing here to delete (e.g. builtins, unspecialized) + if (inferred && inferred != jl_nothing && !jl_is_uint8(inferred)) { // disregard if there is nothing here to delete (e.g. builtins, unspecialized) jl_method_t *def = mi->def.method; if (jl_is_method(def)) { // don't delete toplevel code int is_relocatable = !s->incremental || jl_is_code_info(inferred) || (jl_is_string(inferred) && jl_string_len(inferred) > 0 && jl_string_data(inferred)[jl_string_len(inferred) - 1]); + int discard = 0; if (!is_relocatable) { - inferred = jl_nothing; + discard = 1; } else if (def->source == NULL) { // don't delete code from optimized opaque closures that can't be reconstructed (and builtins) } else if (jl_atomic_load_relaxed(&ci->max_world) != ~(size_t)0 || // delete all code that cannot run jl_atomic_load_relaxed(&ci->invoke) == jl_fptr_const_return) { // delete all code that just returns a constant - inferred = jl_nothing; + discard = 1; } else if (native_functions && // don't delete any code if making a ji file (ci->owner == jl_nothing) && // don't delete code for external interpreters !effects_foldable(jl_atomic_load_relaxed(&ci->ipo_purity_bits)) && // don't delete code we may want for irinterp jl_ir_inlining_cost(inferred) == UINT16_MAX) { // don't delete inlineable code // delete the code now: if we thought it was worth keeping, it would have been converted to object code - inferred = jl_nothing; + discard = 1; } - if (inferred == jl_nothing) { - record_field_change((jl_value_t**)&ci->inferred, jl_nothing); + if (discard) { + // keep only the inlining cost, so inference can later decide if it is worth getting the source back + if (jl_is_string(inferred) || jl_is_code_info(inferred)) + inferred = jl_box_uint8(jl_encode_inlining_cost(jl_ir_inlining_cost(inferred))); + else + inferred = jl_nothing; + record_field_change((jl_value_t**)&ci->inferred, inferred); } else if (s->incremental && jl_is_string(inferred)) { // New roots for external methods @@ -2687,7 +2693,7 @@ static void strip_specializations_(jl_method_instance_t *mi) jl_code_instance_t *codeinst = jl_atomic_load_relaxed(&mi->cache); while (codeinst) { jl_value_t *inferred = jl_atomic_load_relaxed(&codeinst->inferred); - if (inferred && inferred != jl_nothing) { + if (inferred && inferred != jl_nothing && !jl_is_uint8(inferred)) { if (jl_options.strip_ir) { record_field_change((jl_value_t**)&codeinst->inferred, jl_nothing); } From 42b34efacdd3f76f5e824a949d19f80833b1e4fa Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 9 Jun 2025 16:31:43 -0400 Subject: [PATCH 2/2] Update src/ircode.c Co-authored-by: Justin Willmert <2965436+jmert@users.noreply.github.com> --- src/ircode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ircode.c b/src/ircode.c index a79f8e900b5fd..65130e46edfe0 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -1271,7 +1271,7 @@ JL_DLLEXPORT uint16_t jl_decode_inlining_cost(uint8_t inlining_cost) else if (shift == 0) return inlining_cost; else - return 0x20 | ((inlining_cost & 0x1f) << (shift - 1)); + return (0x20 | (inlining_cost & 0x1f)) << (shift - 1); } JL_DLLEXPORT uint16_t jl_ir_inlining_cost(jl_value_t *data)