From 653c41ad789116df0df10ddef583a12cac39b2b3 Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Thu, 15 Aug 2024 05:55:12 +0000 Subject: [PATCH 01/11] Only pin owners, not all generic memory objects --- src/genericmemory.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/genericmemory.c b/src/genericmemory.c index e435ec3b63c9f..68a8585b4ae47 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -166,6 +166,7 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void m->length = nel; jl_genericmemory_data_owner_field(m) = own_buffer ? (jl_value_t*)m : NULL; if (own_buffer) { + PTR_PIN(m); int isaligned = 0; // TODO: allow passing memalign'd buffers jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned); size_t allocated_bytes = memory_block_usable_size(data, isaligned); From 90e68f296e491eaa5defcf558015e9b865ea6610 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 7 Nov 2024 19:00:51 +1300 Subject: [PATCH 02/11] Adding macros to push pinning roots - port PR #43 (#71) Port #43 to dev. Add _NO_TPIN macros for pushing GC frames. --- src/interpreter.c | 7 +++++++ src/julia.h | 33 +++++++++++++++++++++++++++++++++ src/subtype.c | 6 +++--- 3 files changed, 43 insertions(+), 3 deletions(-) diff --git a/src/interpreter.c b/src/interpreter.c index 35c70a9ead2f1..305f174ad7f0b 100644 --- a/src/interpreter.c +++ b/src/interpreter.c @@ -52,7 +52,14 @@ extern void JL_GC_ENABLEFRAME(interpreter_state*) JL_NOTSAFEPOINT; #else +#ifdef MMTK_GC +#define JL_GC_ENCODE_PUSHFRAME(n) ((((size_t)(n))<<3)|2) +// For roots that are not transitively pinned +#define JL_GC_ENCODE_PUSHFRAME_NO_TPIN(n) ((((size_t)(n))<<3)|6) +#else #define JL_GC_ENCODE_PUSHFRAME(n) ((((size_t)(n))<<2)|2) +#define JL_GC_ENCODE_PUSHFRAME_NO_TPIN(n) JL_GC_ENCODE_PUSHFRAME(n) +#endif #define JL_GC_PUSHFRAME(frame,locals,n) \ JL_CPPALLOCA(frame, sizeof(*frame)+(((n)+3)*sizeof(jl_value_t*))); \ diff --git a/src/julia.h b/src/julia.h index a80a69049ccb2..9516475ea0521 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1049,8 +1049,41 @@ struct _jl_gcframe_t { #define jl_pgcstack (jl_current_task->gcstack) +#ifndef MMTK_GC #define JL_GC_ENCODE_PUSHARGS(n) (((size_t)(n))<<2) #define JL_GC_ENCODE_PUSH(n) ((((size_t)(n))<<2)|1) +#define JL_GC_DECODE_NROOTS(n) (n >> 2) + +#define JL_GC_ENCODE_PUSHARGS_NO_TPIN(n) JL_GC_ENCODE_PUSHARGS(n) +#define JL_GC_ENCODE_PUSH_NO_TPIN(n) JL_GC_ENCODE_PUSH(n) +#else + +// We use an extra bit (100) in the nroots value from the frame to indicate that the roots +// in the frame are/are not transitively pinning. +// There are currently 3 macros that encode passing nroots to the gcframe +// and they use the two lowest bits to encode information about what is in the frame (as below). +// To support the distinction between transtively pinning roots and non transitively pinning roots +// on the stack, we take another bit from nroots to encode information about whether or not to +// transitively pin the roots in the frame. +// +// So the ones that transitively pin look like: +// #define JL_GC_ENCODE_PUSHARGS(n) (((size_t)(n))<<3) +// #define JL_GC_ENCODE_PUSH(n) ((((size_t)(n))<<3)|1) +// #define JL_GC_ENCODE_PUSHFRAME(n) ((((size_t)(n))<<3)|2) +// and the ones that do not look like: +// #define JL_GC_ENCODE_PUSHARGS_NO_TPIN(n) (((size_t)(n))<<3|4) +// #define JL_GC_ENCODE_PUSH_NO_TPIN(n) ((((size_t)(n))<<3)|5) +// #define JL_GC_ENCODE_PUSHFRAME_NO_TPIN(n) ((((size_t)(n))<<3)|6) + +// these are transitively pinning +#define JL_GC_ENCODE_PUSHARGS(n) (((size_t)(n))<<3) +#define JL_GC_ENCODE_PUSH(n) ((((size_t)(n))<<3)|1) +#define JL_GC_DECODE_NROOTS(n) (n >> 3) + +// these only pin the root object itself +#define JL_GC_ENCODE_PUSHARGS_NO_TPIN(n) (((size_t)(n))<<3|4) +#define JL_GC_ENCODE_PUSH_NO_TPIN(n) ((((size_t)(n))<<3)|5) +#endif #ifdef __clang_gcanalyzer__ diff --git a/src/subtype.c b/src/subtype.c index a0b7bff4006ce..5cb779fc0be8a 100644 --- a/src/subtype.c +++ b/src/subtype.c @@ -274,7 +274,7 @@ static void re_save_env(jl_stenv_t *e, jl_savedenv_t *se, int root) } else { roots = se->roots; - nroots = se->gcframe.nroots >> 2; + nroots = JL_GC_DECODE_NROOTS(se->gcframe.nroots); } } jl_varbinding_t *v = e->vars; @@ -367,7 +367,7 @@ static void restore_env(jl_stenv_t *e, jl_savedenv_t *se, int root) JL_NOTSAFEPO } else { roots = se->roots; - nroots = se->gcframe.nroots >> 2; + nroots = JL_GC_DECODE_NROOTS(se->gcframe.nroots); } } jl_varbinding_t *v = e->vars; @@ -4193,7 +4193,7 @@ static int merge_env(jl_stenv_t *e, jl_savedenv_t *me, jl_savedenv_t *se, int co else { saved = se->roots; merged = me->roots; - nroots = se->gcframe.nroots >> 2; + nroots = JL_GC_DECODE_NROOTS(se->gcframe.nroots); } assert(nroots == current_env_length(e) * 3); assert(nroots % 3 == 0); From 16c764aee46aca1c01c3bead0df332f07178421b Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Fri, 29 Nov 2024 12:22:51 +1300 Subject: [PATCH 03/11] Add GC preserve hook (#70) This PR ports https://github.com/mmtk/julia/pull/58 to `dev`. This PR is mostly the same as https://github.com/mmtk/julia/pull/58 except that 1. this PR does not remove transitive pinning of shadow stack roots (we know it is unsound to remove the transitive pinning at this stage), and 2. this PR includes minor refactoring for GC codegen interface. --- src/gc-interface.h | 7 +++++ src/gc-mmtk.c | 47 ++++++++++++++++++++++++++++ src/gc-stock.c | 15 +++++++++ src/genericmemory.c | 2 +- src/jl_exported_funcs.inc | 2 ++ src/julia_threads.h | 3 ++ src/llvm-final-gc-lowering.cpp | 2 ++ src/llvm-gc-interface-passes.h | 9 ++++++ src/llvm-late-gc-lowering-mmtk.cpp | 46 +++++++++++++++++++++++++++ src/llvm-late-gc-lowering-stock.cpp | 4 +++ src/llvm-late-gc-lowering.cpp | 14 +++------ src/llvm-pass-helpers.cpp | 48 +++++++++++++++++++++++++++++ src/llvm-pass-helpers.h | 6 ++++ 13 files changed, 194 insertions(+), 11 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 826e91355b17a..3e5c8d64dda20 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -109,6 +109,13 @@ JL_DLLEXPORT const char* jl_gc_active_impl(void); // It still needs to be annotated with JL_DLLEXPORT since it is called from Rust by MMTk. JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT; +// TODO: The preserve hook functions may be temporary. We should see the performance impact of the change. + +// Runtime hook for gc preserve begin. The GC needs to make sure that the preserved objects and its children stay alive and won't move. +JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT; +// Runtime hook for gc preserve end. The GC needs to make sure that the preserved objects and its children stay alive and won't move. +JL_DLLEXPORT void jl_gc_preserve_end_hook(void) JL_NOTSAFEPOINT; + // ========================================================================= // // Metrics // ========================================================================= // diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index 2f261a2e8e2fd..da1b56e107403 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -1208,6 +1208,53 @@ JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) return NULL; } +#define jl_p_gcpreserve_stack (jl_current_task->gcpreserve_stack) + +// This macro currently uses malloc instead of alloca because this function will exit +// after pushing the roots into the gc_preserve_stack, which means that the preserve_begin function's +// stack frame will be destroyed (together with its alloca variables). When we support lowering this code +// inside the same function that is doing the preserve_begin/preserve_end calls we should be able to simple use allocas. +// Note also that we use a separate stack for gc preserve roots to avoid the possibility of calling free +// on a stack that has been allocated with alloca instead of malloc, which could happen depending on the order in which +// JL_GC_POP() and jl_gc_preserve_end_hook() occurs. + +#define JL_GC_PUSHARGS_PRESERVE_ROOT_OBJS(rts_var,n) \ + rts_var = ((jl_value_t**)malloc(((n)+2)*sizeof(jl_value_t*)))+2; \ + ((void**)rts_var)[-2] = (void*)JL_GC_ENCODE_PUSHARGS(n); \ + ((void**)rts_var)[-1] = jl_p_gcpreserve_stack; \ + memset((void*)rts_var, 0, (n)*sizeof(jl_value_t*)); \ + jl_p_gcpreserve_stack = (jl_gcframe_t*)&(((void**)rts_var)[-2]); \ + +#define JL_GC_POP_PRESERVE_ROOT_OBJS() \ + jl_gcframe_t *curr = jl_p_gcpreserve_stack; \ + if(curr) { \ + (jl_p_gcpreserve_stack = jl_p_gcpreserve_stack->prev); \ + free(curr); \ + } + +// Add each argument as a tpin root object. +// However, we cannot use JL_GC_PUSH and JL_GC_POP since the slots should live +// beyond this function. Instead, we maintain a tpin stack by mallocing/freeing +// the frames for each of the preserve regions we encounter +JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT +{ + jl_value_t** frame; + JL_GC_PUSHARGS_PRESERVE_ROOT_OBJS(frame, n); + if (n == 0) return; + + va_list args; + va_start(args, n); + for (int i = 0; i < n; i++) { + frame[i] = va_arg(args, jl_value_t *); + } + va_end(args); +} + +JL_DLLEXPORT void jl_gc_preserve_end_hook(void) JL_NOTSAFEPOINT +{ + JL_GC_POP_PRESERVE_ROOT_OBJS(); +} + #ifdef __cplusplus } #endif diff --git a/src/gc-stock.c b/src/gc-stock.c index 8118b3c5629ae..6d8835ec32471 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -4080,6 +4080,21 @@ JL_DLLEXPORT const char* jl_gc_active_impl(void) { return "Built with stock GC"; } +void jl_gc_notify_image_alloc(const char* img_data, size_t len) +{ + // Do nothing +} + +JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT +{ + jl_unreachable(); +} + +JL_DLLEXPORT void jl_gc_preserve_end_hook(void) JL_NOTSAFEPOINT +{ + jl_unreachable(); +} + #ifdef __cplusplus } #endif diff --git a/src/genericmemory.c b/src/genericmemory.c index 68a8585b4ae47..4180f8f58ebdd 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -166,7 +166,7 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void m->length = nel; jl_genericmemory_data_owner_field(m) = own_buffer ? (jl_value_t*)m : NULL; if (own_buffer) { - PTR_PIN(m); + // FIXME: PTR_PIN(m); int isaligned = 0; // TODO: allow passing memalign'd buffers jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned); size_t allocated_bytes = memory_block_usable_size(data, isaligned); diff --git a/src/jl_exported_funcs.inc b/src/jl_exported_funcs.inc index 4d1ab94644e39..9c7f97b01e7eb 100644 --- a/src/jl_exported_funcs.inc +++ b/src/jl_exported_funcs.inc @@ -181,6 +181,8 @@ XX(jl_gc_set_max_memory) \ XX(jl_gc_sync_total_bytes) \ XX(jl_gc_total_hrtime) \ + XX(jl_gc_preserve_begin_hook) \ + XX(jl_gc_preserve_end_hook) \ XX(jl_gdblookup) \ XX(jl_generating_output) \ XX(jl_declare_const_gf) \ diff --git a/src/julia_threads.h b/src/julia_threads.h index 061eb9266e7a7..427c3fda826cb 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -274,6 +274,9 @@ typedef struct _jl_task_t { // uint48_t padding2_64; // saved gc stack top for context switches jl_gcframe_t *gcstack; + // GC stack of objects from gc preserve regions + // These must always be transitively pinned. Only used by MMTK. + jl_gcframe_t *gcpreserve_stack; size_t world_age; // quick lookup for current ptls jl_ptls_t ptls; // == jl_all_tls_states[tid] diff --git a/src/llvm-final-gc-lowering.cpp b/src/llvm-final-gc-lowering.cpp index 76dcd944890ab..8c3eda208aa6f 100644 --- a/src/llvm-final-gc-lowering.cpp +++ b/src/llvm-final-gc-lowering.cpp @@ -42,6 +42,8 @@ void FinalLowerGC::lowerPushGCFrame(CallInst *target, Function &F) IRBuilder<> builder(target); StoreInst *inst = builder.CreateAlignedStore( + // FIXME: We should use JL_GC_ENCODE_PUSHARGS_NO_TPIN here. + // We need to make sure things are properly pinned before turning this into a non TPIN push. ConstantInt::get(T_size, JL_GC_ENCODE_PUSHARGS(nRoots)), builder.CreateConstInBoundsGEP1_32(T_prjlvalue, gcframe, 0, "frame.nroots"),// GEP of 0 becomes a noop and eats the name Align(sizeof(void*))); diff --git a/src/llvm-gc-interface-passes.h b/src/llvm-gc-interface-passes.h index 7b2a4bb033203..aae2e99c6a383 100644 --- a/src/llvm-gc-interface-passes.h +++ b/src/llvm-gc-interface-passes.h @@ -361,6 +361,7 @@ struct LateLowerGCFrame: private JuliaPassContext { void PlaceGCFrameReset(State &S, unsigned R, unsigned MinColorRoot, ArrayRef Colors, Value *GCFrame, Instruction *InsertBefore); void PlaceRootsAndUpdateCalls(ArrayRef Colors, int PreAssignedColors, State &S, std::map>); void CleanupWriteBarriers(Function &F, State *S, const SmallVector &WriteBarriers, bool *CFGModified); + void CleanupGCPreserve(Function &F, CallInst *CI, Value *callee, Type *T_size); bool CleanupIR(Function &F, State *S, bool *CFGModified); void NoteUseChain(State &S, BBState &BBS, User *TheUser); SmallVector GetPHIRefinements(PHINode *phi, State &S); @@ -413,4 +414,12 @@ struct FinalLowerGC: private JuliaPassContext { void lowerSafepoint(CallInst *target, Function &F); }; +inline bool isSpecialPtr(Type *Ty) { + PointerType *PTy = dyn_cast(Ty); + if (!PTy) + return false; + unsigned AS = PTy->getAddressSpace(); + return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial; +} + #endif // LLVM_GC_PASSES_H diff --git a/src/llvm-late-gc-lowering-mmtk.cpp b/src/llvm-late-gc-lowering-mmtk.cpp index 5539c8dbcf153..831687b203da3 100644 --- a/src/llvm-late-gc-lowering-mmtk.cpp +++ b/src/llvm-late-gc-lowering-mmtk.cpp @@ -94,3 +94,49 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) } return target; } + +void LateLowerGCFrame::CleanupGCPreserve(Function &F, CallInst *CI, Value *callee, Type *T_size) { + if (callee == gc_preserve_begin_func) { + // Initialize an IR builder. + IRBuilder<> builder(CI); + + builder.SetCurrentDebugLocation(CI->getDebugLoc()); + size_t nargs = 0; + State S2(F); + + std::vector args; + for (Use &U : CI->args()) { + Value *V = U; + if (isa(V)) + continue; + if (isa(V->getType())) { + if (isSpecialPtr(V->getType())) { + int Num = Number(S2, V); + if (Num >= 0) { + nargs++; + Value *Val = GetPtrForNumber(S2, Num, CI); + args.push_back(Val); + } + } + } else { + auto Nums = NumberAll(S2, V); + for (int Num : Nums) { + if (Num < 0) + continue; + Value *Val = GetPtrForNumber(S2, Num, CI); + args.push_back(Val); + nargs++; + } + } + } + args.insert(args.begin(), ConstantInt::get(T_size, nargs)); + + ArrayRef args_llvm = ArrayRef(args); + builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveBeginHook), args_llvm ); + } else if (callee == gc_preserve_end_func) { + // Initialize an IR builder. + IRBuilder<> builder(CI); + builder.SetCurrentDebugLocation(CI->getDebugLoc()); + builder.CreateCall(getOrDeclare(jl_well_known::GCPreserveEndHook), {}); + } +} diff --git a/src/llvm-late-gc-lowering-stock.cpp b/src/llvm-late-gc-lowering-stock.cpp index 2a11487773396..838300043768d 100644 --- a/src/llvm-late-gc-lowering-stock.cpp +++ b/src/llvm-late-gc-lowering-stock.cpp @@ -7,3 +7,7 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) // Do nothing for the stock GC return target; } + +void LateLowerGCFrame::CleanupGCPreserve(Function &F, CallInst *CI, Value *callee, Type *T_size) { + // Do nothing for the stock GC +} diff --git a/src/llvm-late-gc-lowering.cpp b/src/llvm-late-gc-lowering.cpp index 7d6fba65a79e7..b517c948087f7 100644 --- a/src/llvm-late-gc-lowering.cpp +++ b/src/llvm-late-gc-lowering.cpp @@ -13,14 +13,6 @@ static bool isTrackedValue(Value *V) { return PT && PT->getAddressSpace() == AddressSpace::Tracked; } -static bool isSpecialPtr(Type *Ty) { - PointerType *PTy = dyn_cast(Ty); - if (!PTy) - return false; - unsigned AS = PTy->getAddressSpace(); - return AddressSpace::FirstSpecial <= AS && AS <= AddressSpace::LastSpecial; -} - // return how many Special pointers are in T (count > 0), // and if there is anything else in T (all == false) CountTrackedPointers::CountTrackedPointers(Type *T, bool ignore_loaded) { @@ -2006,9 +1998,11 @@ bool LateLowerGCFrame::CleanupIR(Function &F, State *S, bool *CFGModified) { continue; } Value *callee = CI->getCalledOperand(); - if (callee && (callee == gc_flush_func || callee == gc_preserve_begin_func - || callee == gc_preserve_end_func)) { + if (callee && callee == gc_flush_func) { /* No replacement */ + } else if (callee && (callee == gc_preserve_begin_func + || callee == gc_preserve_end_func)) { + CleanupGCPreserve(F, CI, callee, T_size); } else if (pointer_from_objref_func != nullptr && callee == pointer_from_objref_func) { auto *obj = CI->getOperand(0); auto *ASCI = new AddrSpaceCastInst(obj, CI->getType(), "", CI); diff --git a/src/llvm-pass-helpers.cpp b/src/llvm-pass-helpers.cpp index ca25251040fb2..2d54995161908 100644 --- a/src/llvm-pass-helpers.cpp +++ b/src/llvm-pass-helpers.cpp @@ -252,6 +252,8 @@ namespace jl_well_known { static const char *GC_SMALL_ALLOC_NAME = XSTR(jl_gc_small_alloc); static const char *GC_QUEUE_ROOT_NAME = XSTR(jl_gc_queue_root); static const char *GC_ALLOC_TYPED_NAME = XSTR(jl_gc_alloc_typed); + static const char *GC_PRESERVE_BEGIN_HOOK_NAME = XSTR(jl_gc_preserve_begin_hook); + static const char *GC_PRESERVE_END_HOOK_NAME = XSTR(jl_gc_preserve_end_hook); using jl_intrinsics::addGCAllocAttributes; @@ -320,4 +322,50 @@ namespace jl_well_known { allocTypedFunc->addFnAttr(Attribute::getWithAllocSizeArgs(ctx, 1, None)); return addGCAllocAttributes(allocTypedFunc); }); + + const WellKnownFunctionDescription GCPreserveBeginHook( + GC_PRESERVE_BEGIN_HOOK_NAME, + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(ctx), + { T_size }, + true), + Function::ExternalLinkage, + GC_PRESERVE_BEGIN_HOOK_NAME); + +#if JL_LLVM_VERSION >= 160000 + func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif + return func; + }); + + const WellKnownFunctionDescription GCPreserveEndHook( + GC_PRESERVE_END_HOOK_NAME, + [](Type *T_size) { + auto &ctx = T_size->getContext(); + auto func = Function::Create( + FunctionType::get( + Type::getVoidTy(ctx), + { }, + false), + Function::ExternalLinkage, + GC_PRESERVE_END_HOOK_NAME); +#if JL_LLVM_VERSION >= 160000 + func->setMemoryEffects(MemoryEffects::inaccessibleOrArgMemOnly()); +#else + func->addFnAttr(Attribute::InaccessibleMemOrArgMemOnly); +#endif + return func; + }); +} + +void setName(llvm::Value *V, const llvm::Twine &Name, int debug_info) +{ + if (debug_info >= 2 && !llvm::isa(V)) { + V->setName(Name); + } } diff --git a/src/llvm-pass-helpers.h b/src/llvm-pass-helpers.h index d46f1f46634e6..b02c53b9797fa 100644 --- a/src/llvm-pass-helpers.h +++ b/src/llvm-pass-helpers.h @@ -155,6 +155,12 @@ namespace jl_well_known { // `jl_gc_alloc_typed`: allocates bytes. extern const WellKnownFunctionDescription GCAllocTyped; + + // `jl_gc_preserve_begin_hook`: called at the beginning of gc preserve regions, if required + extern const WellKnownFunctionDescription GCPreserveBeginHook; + + // `jl_gc_preserve_end_hook`: called at the end of gc preserve regions, if required + extern const WellKnownFunctionDescription GCPreserveEndHook; } void setName(llvm::Value *V, const llvm::Twine &Name, int debug_info); From d922f461ac3778c32121cae50e1559daaa61adda Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 4 Dec 2024 12:43:18 +1300 Subject: [PATCH 04/11] Support VO bit (#76) This PR ports https://github.com/mmtk/julia/pull/59 to `dev`. In addition, this PR 1. introduces `jl_gc_permsymbol` for allocating the special perm object in `mk_symbol`, 2. removes some seemingly unnecessary post alloc calls for `jl_gc_perm_alloc` in `datatype.c`, and 3. does not support set VO bit using the slowpath (MMTk call). --- src/gc-interface.h | 8 ++++++++ src/gc-mmtk.c | 44 ++++++++++++++++++++++++++++++++------------ src/gc-stock.c | 9 +++++++++ src/symbol.c | 7 ++----- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 3e5c8d64dda20..03a3d26ee665a 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -214,6 +214,14 @@ JL_DLLEXPORT void *jl_gc_perm_alloc(size_t sz, int zero, unsigned align, // the allocated object. All objects stored in fields of this object // must be either permanently allocated or have other roots. struct _jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT; +// permanently allocates a symbol (jl_sym_t). The object needs to be word aligned, +// and tagged with jl_sym_tag. +// FIXME: Ideally we should merge this with jl_gc_permobj, as symbol is an object. +// Currently there are a few differences between the two functions, and refactoring is needed. +// 1. sz for this function includes the object header, and sz for jl_gc_permobj excludes the header size. +// 2. align for this function is word align, and align for jl_gc_permobj depends on the allocation size. +// 3. ty for this function is jl_symbol_tag << 4, and ty for jl_gc_permobj is a datatype pointer. +struct _jl_value_t *jl_gc_permsymbol(size_t sz) JL_NOTSAFEPOINT; // This function notifies the GC about memory addresses that are set when loading the boot image. // The GC may use that information to, for instance, determine that such objects should // be treated as marked and belonged to the old generation in nursery collections. diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index da1b56e107403..fe4bc272a7c89 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -842,18 +842,28 @@ STATIC_INLINE void* bump_alloc_fast(MMTkMutatorContext* mutator, uintptr_t* curs } } -STATIC_INLINE void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { - ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR]; - return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0); +inline void mmtk_set_side_metadata(const void* side_metadata_base, void* obj) { + intptr_t addr = (intptr_t) obj; + uint8_t* meta_addr = (uint8_t*) side_metadata_base + (addr >> 6); + intptr_t shift = (addr >> 3) & 0b111; + while(1) { + uint8_t old_val = *meta_addr; + uint8_t new_val = old_val | (1 << shift); + if (jl_atomic_cmpswap((_Atomic(uint8_t)*)meta_addr, &old_val, new_val)) { + break; + } + } } -inline void mmtk_immix_post_alloc_slow(MMTkMutatorContext* mutator, void* obj, size_t size) { - mmtk_post_alloc(mutator, obj, size, 0); +inline void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { + ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR]; + return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0); } -STATIC_INLINE void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - // FIXME: for now, we do nothing - // but when supporting moving, this is where we set the valid object (VO) bit +inline void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { + if (MMTK_NEEDS_VO_BIT) { + mmtk_set_side_metadata(MMTK_SIDE_VO_BIT_BASE_ADDRESS, obj); + } } STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { @@ -861,10 +871,10 @@ STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1); } -STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { - // FIXME: Similarly, for now, we do nothing - // but when supporting moving, this is where we set the valid object (VO) bit - // and log (old gen) bit +inline void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { + if (MMTK_NEEDS_VO_BIT) { + mmtk_set_side_metadata(MMTK_SIDE_VO_BIT_BASE_ADDRESS, obj); + } } JL_DLLEXPORT jl_value_t *jl_mmtk_gc_alloc_default(jl_ptls_t ptls, int osize, size_t align, void *ty) @@ -1042,6 +1052,16 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } +jl_value_t *jl_gc_permsymbol(size_t sz) JL_NOTSAFEPOINT +{ + jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc(sz, 0, sizeof(void*), 0); + jl_value_t *sym = jl_valueof(tag); + jl_ptls_t ptls = jl_current_task->ptls; + jl_set_typetagof(sym, jl_symbol_tag, 0); // We need to set symbol tag. The GC tag doesnt matter. + mmtk_immortal_post_alloc_fast(&ptls->gc_tls.mmtk_mutator, sym, sz); + return sym; +} + JL_DLLEXPORT void *jl_gc_managed_malloc(size_t sz) { jl_ptls_t ptls = jl_current_task->ptls; diff --git a/src/gc-stock.c b/src/gc-stock.c index 6d8835ec32471..e7362745ec17c 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3940,6 +3940,15 @@ jl_value_t *jl_gc_permobj(size_t sz, void *ty) JL_NOTSAFEPOINT return jl_valueof(o); } +jl_value_t *jl_gc_permsymbol(size_t sz) JL_NOTSAFEPOINT +{ + jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc(sz, 0, sizeof(void*), 0); + jl_value_t *sym = jl_valueof(tag); + // set to old marked so that we won't look at it in the GC or write barrier. + jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED); + return sym; +} + JL_DLLEXPORT int jl_gc_enable_conservative_gc_support(void) { if (jl_is_initialized()) { diff --git a/src/symbol.c b/src/symbol.c index ef2c11e0842e8..cf53743b60602 100644 --- a/src/symbol.c +++ b/src/symbol.c @@ -10,6 +10,7 @@ #include "julia.h" #include "julia_internal.h" #include "julia_assert.h" +#include "gc-interface.h" #ifdef __cplusplus extern "C" { @@ -34,12 +35,8 @@ static size_t symbol_nbytes(size_t len) JL_NOTSAFEPOINT static jl_sym_t *mk_symbol(const char *str, size_t len) JL_NOTSAFEPOINT { - jl_sym_t *sym; size_t nb = symbol_nbytes(len); - jl_taggedvalue_t *tag = (jl_taggedvalue_t*)jl_gc_perm_alloc(nb, 0, sizeof(void*), 0); - sym = (jl_sym_t*)jl_valueof(tag); - // set to old marked so that we won't look at it in the GC or write barrier. - jl_set_typetagof(sym, jl_symbol_tag, GC_OLD_MARKED); + jl_sym_t *sym = (jl_sym_t*)jl_gc_permsymbol(nb); jl_atomic_store_relaxed(&sym->left, NULL); jl_atomic_store_relaxed(&sym->right, NULL); sym->hash = hash_symbol(str, len); From 8fa5342f584acdfa431c16ed8cd367b291338f20 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 6 Feb 2025 02:45:29 +0000 Subject: [PATCH 05/11] Add jl_gc_notify_image_alloc. Add back VO bit generation in code gen. Fix a few build issues. --- src/gc-interface.h | 4 ++++ src/gc-mmtk.c | 25 +++++++++---------------- src/gc-stock.c | 8 ++++---- src/julia.h | 4 ++++ src/llvm-late-gc-lowering-mmtk.cpp | 26 ++++++++++++++++++++++++++ src/staticdata.c | 5 ++++- 6 files changed, 51 insertions(+), 21 deletions(-) diff --git a/src/gc-interface.h b/src/gc-interface.h index 03a3d26ee665a..5224fa8329e97 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -226,6 +226,10 @@ struct _jl_value_t *jl_gc_permsymbol(size_t sz) JL_NOTSAFEPOINT; // The GC may use that information to, for instance, determine that such objects should // be treated as marked and belonged to the old generation in nursery collections. void jl_gc_notify_image_load(const char* img_data, size_t len); +// This function notifies the GC about memory addresses that are set when allocating the boot image. +// The GC may use that information to, for instance, determine that all objects in that chunk of memory should +// be treated as marked and belonged to the old generation in nursery collections. +void jl_gc_notify_image_alloc(const char* img_data, size_t len); // ========================================================================= // // Runtime Write-Barriers diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index fe4bc272a7c89..0b1556b2ef976 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -40,19 +40,7 @@ static const size_t default_collect_interval = 3200 * 1024 * sizeof(void*); static memsize_t max_total_memory = (memsize_t) MAX32HEAP; #endif -// ========================================================================= // -// Defined by the binding -// ========================================================================= // -extern void mmtk_julia_copy_stack_check(int copy_stack); -extern void mmtk_gc_init(uintptr_t min_heap_size, uintptr_t max_heap_size, uintptr_t n_gcthreads, uintptr_t header_size, uintptr_t tag); -extern void mmtk_object_reference_write_post(void* mutator, const void* parent, const void* ptr); -extern void mmtk_object_reference_write_slow(void* mutator, const void* parent, const void* ptr); -extern void* mmtk_alloc(void* mutator, size_t size, size_t align, size_t offset, int allocator); -extern void mmtk_post_alloc(void* mutator, void* refer, size_t bytes, int allocator); -extern void mmtk_store_obj_size_c(void* obj, size_t size); -extern const void* MMTK_SIDE_LOG_BIT_BASE_ADDRESS; -extern const void* MMTK_SIDE_VO_BIT_BASE_ADDRESS; // ========================================================================= // // GC Initialization and Control @@ -842,7 +830,7 @@ STATIC_INLINE void* bump_alloc_fast(MMTkMutatorContext* mutator, uintptr_t* curs } } -inline void mmtk_set_side_metadata(const void* side_metadata_base, void* obj) { +STATIC_INLINE void mmtk_set_side_metadata(const void* side_metadata_base, void* obj) { intptr_t addr = (intptr_t) obj; uint8_t* meta_addr = (uint8_t*) side_metadata_base + (addr >> 6); intptr_t shift = (addr >> 3) & 0b111; @@ -855,12 +843,12 @@ inline void mmtk_set_side_metadata(const void* side_metadata_base, void* obj) { } } -inline void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { +STATIC_INLINE void* mmtk_immix_alloc_fast(MMTkMutatorContext* mutator, size_t size, size_t align, size_t offset) { ImmixAllocator* allocator = &mutator->allocators.immix[MMTK_DEFAULT_IMMIX_ALLOCATOR]; return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (intptr_t)allocator->limit, size, align, offset, 0); } -inline void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { +STATIC_INLINE void mmtk_immix_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { if (MMTK_NEEDS_VO_BIT) { mmtk_set_side_metadata(MMTK_SIDE_VO_BIT_BASE_ADDRESS, obj); } @@ -871,7 +859,7 @@ STATIC_INLINE void* mmtk_immortal_alloc_fast(MMTkMutatorContext* mutator, size_t return bump_alloc_fast(mutator, (uintptr_t*)&allocator->cursor, (uintptr_t)allocator->limit, size, align, offset, 1); } -inline void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { +STATIC_INLINE void mmtk_immortal_post_alloc_fast(MMTkMutatorContext* mutator, void* obj, size_t size) { if (MMTK_NEEDS_VO_BIT) { mmtk_set_side_metadata(MMTK_SIDE_VO_BIT_BASE_ADDRESS, obj); } @@ -1099,6 +1087,11 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) mmtk_set_vm_space((void*)img_data, len); } +void jl_gc_notify_image_alloc(const char* img_data, size_t len) +{ + mmtk_immortal_region_post_alloc((void*)img_data, len); +} + // ========================================================================= // // Code specific to stock that is not supported by MMTk // ========================================================================= // diff --git a/src/gc-stock.c b/src/gc-stock.c index e7362745ec17c..09ec9911d52d7 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -4085,15 +4085,15 @@ void jl_gc_notify_image_load(const char* img_data, size_t len) // Do nothing } -JL_DLLEXPORT const char* jl_gc_active_impl(void) { - return "Built with stock GC"; -} - void jl_gc_notify_image_alloc(const char* img_data, size_t len) { // Do nothing } +JL_DLLEXPORT const char* jl_gc_active_impl(void) { + return "Built with stock GC"; +} + JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT { jl_unreachable(); diff --git a/src/julia.h b/src/julia.h index 9516475ea0521..660ec02ef8e99 100644 --- a/src/julia.h +++ b/src/julia.h @@ -1056,8 +1056,12 @@ struct _jl_gcframe_t { #define JL_GC_ENCODE_PUSHARGS_NO_TPIN(n) JL_GC_ENCODE_PUSHARGS(n) #define JL_GC_ENCODE_PUSH_NO_TPIN(n) JL_GC_ENCODE_PUSH(n) + #else +// VO bit is required to support conservative stack scanning and moving. +#define MMTK_NEEDS_VO_BIT (1) + // We use an extra bit (100) in the nroots value from the frame to indicate that the roots // in the frame are/are not transitively pinning. // There are currently 3 macros that encode passing nroots to the gcframe diff --git a/src/llvm-late-gc-lowering-mmtk.cpp b/src/llvm-late-gc-lowering-mmtk.cpp index 831687b203da3..e3f83be1f9381 100644 --- a/src/llvm-late-gc-lowering-mmtk.cpp +++ b/src/llvm-late-gc-lowering-mmtk.cpp @@ -1,6 +1,7 @@ // This file is a part of Julia. License is MIT: https://julialang.org/license #include "llvm-gc-interface-passes.h" +#include "mmtk.h" Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) { @@ -83,6 +84,31 @@ Value* LateLowerGCFrame::lowerGCAllocBytesLate(CallInst *target, Function &F) auto v_raw = builder.CreateNSWAdd(result, ConstantInt::get(Type::getInt64Ty(target->getContext()), sizeof(jl_taggedvalue_t))); auto v_as_ptr = builder.CreateIntToPtr(v_raw, smallAllocFunc->getReturnType()); + + // Post alloc + if (MMTK_NEEDS_VO_BIT) { + auto intptr_ty = Type::getInt64Ty(target->getContext()); + auto i8_ty = Type::getInt8Ty(F.getContext()); + intptr_t metadata_base_address = reinterpret_cast(MMTK_SIDE_VO_BIT_BASE_ADDRESS); + auto metadata_base_val = ConstantInt::get(intptr_ty, metadata_base_address); + auto metadata_base_ptr = ConstantExpr::getIntToPtr(metadata_base_val, PointerType::get(i8_ty, 0)); + // intptr_t addr = (intptr_t) v; + auto addr = v_raw; + // uint8_t* vo_meta_addr = (uint8_t*) (MMTK_SIDE_VO_BIT_BASE_ADDRESS) + (addr >> 6); + auto shr = builder.CreateLShr(addr, ConstantInt::get(intptr_ty, 6)); + auto metadata_ptr = builder.CreateGEP(i8_ty, metadata_base_ptr, shr); + // intptr_t shift = (addr >> 3) & 0b111; + auto shift = builder.CreateAnd(builder.CreateLShr(addr, ConstantInt::get(intptr_ty, 3)), ConstantInt::get(intptr_ty, 7)); + // uint8_t byte_val = *vo_meta_addr; + auto byte_val = builder.CreateAlignedLoad(i8_ty, metadata_ptr, Align()); + // uint8_t new_val = byte_val | (1 << shift); + auto shifted_val = builder.CreateShl(ConstantInt::get(intptr_ty, 1), shift); + auto shifted_val_i8 = builder.CreateTruncOrBitCast(shifted_val, i8_ty); + auto new_val = builder.CreateOr(byte_val, shifted_val_i8); + // (*vo_meta_addr) = new_val; + builder.CreateStore(new_val, metadata_ptr); + } + builder.CreateBr(next_instr->getParent()); phiNode->addIncoming(new_call, slowpath); diff --git a/src/staticdata.c b/src/staticdata.c index cb1dc54d26d50..26cf130bf5ce2 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -4083,8 +4083,10 @@ static jl_value_t *jl_restore_package_image_from_stream(void* pkgimage_handle, i char *sysimg; int success = !needs_permalloc; ios_seek(f, datastartpos); - if (needs_permalloc) + if (needs_permalloc) { sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + jl_gc_notify_image_alloc(sysimg, len); + } else sysimg = &f->buf[f->bpos]; if (needs_permalloc) @@ -4208,6 +4210,7 @@ JL_DLLEXPORT void jl_restore_system_image(const char *fname) ios_seek_end(&f); size_t len = ios_pos(&f); char *sysimg = (char*)jl_gc_perm_alloc(len, 0, 64, 0); + jl_gc_notify_image_alloc(sysimg, len); ios_seek(&f, 0); if (ios_readall(&f, sysimg, len) != len) jl_errorf("Error reading system image file."); From cb40d89401160394774808b0e348d7e80329374e Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Fri, 6 Dec 2024 13:09:26 +1300 Subject: [PATCH 06/11] Save thread context before yielding for GC (#78) This PR ports https://github.com/mmtk/mmtk-julia/pull/159 to `dev`. The difference is that this PR adds a general call to the GC interface `jl_gc_notify_thread_yield`. In this case, each GC will do what they need in the call, and the context is saved in the GC specific TLS. --- src/gc-interface.h | 3 +++ src/gc-mmtk.c | 15 +++++++++++++++ src/gc-stock.c | 9 +++++++++ src/gc-tls-mmtk.h | 1 + src/signals-unix.c | 2 ++ 5 files changed, 30 insertions(+) diff --git a/src/gc-interface.h b/src/gc-interface.h index 5224fa8329e97..ebdf26fcdb73d 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -108,6 +108,9 @@ JL_DLLEXPORT const char* jl_gc_active_impl(void); // each GC should implement it but it will most likely not be used by other code in the runtime. // It still needs to be annotated with JL_DLLEXPORT since it is called from Rust by MMTk. JL_DLLEXPORT void jl_gc_sweep_stack_pools_and_mtarraylist_buffers(jl_ptls_t ptls) JL_NOTSAFEPOINT; +// Notifies the GC that the given thread is about to yield for a GC. ctx is the ucontext for the thread +// if it is already fetched by the caller, otherwise it is NULL. +JL_DLLEXPORT void jl_gc_notify_thread_yield(jl_ptls_t ptls, void* ctx); // TODO: The preserve hook functions may be temporary. We should see the performance impact of the change. diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index 0b1556b2ef976..f4353003943e9 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -282,6 +282,8 @@ JL_DLLEXPORT void jl_gc_prepare_to_collect(void) gc_num.total_time_to_safepoint += duration; if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) { + // This thread will yield. + jl_gc_notify_thread_yield(ptls, NULL); JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock #ifndef __clang_gcanalyzer__ mmtk_block_thread_for_gc(); @@ -311,6 +313,19 @@ JL_DLLEXPORT void jl_gc_prepare_to_collect(void) errno = last_errno; } +JL_DLLEXPORT void jl_gc_notify_thread_yield(jl_ptls_t ptls, void* ctx) { + if (ctx == NULL) { + // Save the context for the thread as it was running at the time of the call + int r = getcontext(&ptls->gc_tls.ctx_at_the_time_gc_started); + if (r == -1) { + jl_safe_printf("Failed to save context for conservative scanning\n"); + abort(); + } + return; + } + memcpy(&ptls->gc_tls.ctx_at_the_time_gc_started, ctx, sizeof(ucontext_t)); +} + // ========================================================================= // // GC Statistics // ========================================================================= // diff --git a/src/gc-stock.c b/src/gc-stock.c index 09ec9911d52d7..6e4f14e5412f2 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -3458,6 +3458,11 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection) gc_cblist_pre_gc, (collection)); if (!jl_atomic_load_acquire(&jl_gc_disable_counter)) { + // This thread will yield. + // jl_gc_notify_thread_yield does nothing for the stock GC at the point, but it may be non empty in the future, + // and this is a place where we should call jl_gc_notify_thread_yield. + // TODO: This call can be removed if requested. + jl_gc_notify_thread_yield(ptls, NULL); JL_LOCK_NOGC(&finalizers_lock); // all the other threads are stopped, so this does not make sense, right? otherwise, failing that, this seems like plausibly a deadlock #ifndef __clang_gcanalyzer__ if (_jl_gc_collect(ptls, collection)) { @@ -4104,6 +4109,10 @@ JL_DLLEXPORT void jl_gc_preserve_end_hook(void) JL_NOTSAFEPOINT jl_unreachable(); } +JL_DLLEXPORT void jl_gc_notify_thread_yield(jl_ptls_t ptls, void* ctx) { + // Do nothing before a thread yields +} + #ifdef __cplusplus } #endif diff --git a/src/gc-tls-mmtk.h b/src/gc-tls-mmtk.h index 5b69aef5d55fb..3a7f88980589d 100644 --- a/src/gc-tls-mmtk.h +++ b/src/gc-tls-mmtk.h @@ -14,6 +14,7 @@ extern "C" { typedef struct { MMTkMutatorContext mmtk_mutator; _Atomic(size_t) malloc_sz_since_last_poll; + ucontext_t ctx_at_the_time_gc_started; } jl_gc_tls_states_t; #ifdef __cplusplus diff --git a/src/signals-unix.c b/src/signals-unix.c index 1f4ad647a87af..fa3ad6a09b9eb 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -410,6 +410,8 @@ JL_NO_ASAN static void segv_handler(int sig, siginfo_t *info, void *context) return; } if (sig == SIGSEGV && info->si_code == SEGV_ACCERR && jl_addr_is_safepoint((uintptr_t)info->si_addr) && !is_write_fault(context)) { + // TODO: We should do the same for other platforms + jl_gc_notify_thread_yield(ct->ptls, context); jl_set_gc_and_wait(ct); // Do not raise sigint on worker thread if (jl_atomic_load_relaxed(&ct->tid) != 0) From 217fe9aaa7d6e4c8a6cf7b39071a42439b68eeee Mon Sep 17 00:00:00 2001 From: Eduardo Souza Date: Wed, 18 Dec 2024 16:40:04 +1100 Subject: [PATCH 07/11] Pin pointer literals in literal_pointer_val and Julia pointers in julia_to_scm (#80) Backporting https://github.com/mmtk/julia/pull/63 to `dev`. Co-authored-by: Yi Lin --- src/ast.c | 1 + src/cgutils.cpp | 1 + src/jitlayers.h | 1 + 3 files changed, 3 insertions(+) diff --git a/src/ast.c b/src/ast.c index 0f24d96393f2f..d8af42757ff72 100644 --- a/src/ast.c +++ b/src/ast.c @@ -780,6 +780,7 @@ static value_t julia_to_list2_noalloc(fl_context_t *fl_ctx, jl_value_t *a, jl_va static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_valid) { + PTR_PIN(v); value_t retval; if (julia_to_scm_noalloc1(fl_ctx, v, &retval)) return retval; diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 98c5627578b80..cf8a32e74effa 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -570,6 +570,7 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p) { if (p == NULL) return Constant::getNullValue(ctx.types().T_pjlvalue); + PTR_PIN(p); Value *pgv = literal_pointer_val_slot(ctx, p); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); auto load = ai.decorateInst(maybe_mark_load_dereferenceable( diff --git a/src/jitlayers.h b/src/jitlayers.h index 4637670ec588c..568ad4f0bccd0 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -310,6 +310,7 @@ void add_named_global(StringRef name, void *addr) JL_NOTSAFEPOINT; static inline Constant *literal_static_pointer_val(const void *p, Type *T) JL_NOTSAFEPOINT { + PTR_PIN((void*)p); // this function will emit a static pointer into the generated code // the generated code will only be valid during the current session, // and thus, this should typically be avoided in new API's From 39a9bc42abc095d2cada3e5fdb668a5a750cfa66 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Wed, 5 Feb 2025 19:04:07 +1300 Subject: [PATCH 08/11] Differentiate OBJ_PIN and PTR_PIN. Add more pinning. Trace all global roots. (#84) This PR * differentiates `OBJ_PIN` from `PTR_PIN`. In rare cases, we have to deal with internal pointers, and have to use `PTR_PIN`. * pins more objects that cannot be moved. * traces all the `JL_GLOBALLY_ROOTED` symbols. This PR still transitively pins `jl_global_roots_list`. Without transitive pinning, we observed assertions failures in the precompilation step during Julia build, saying we reach objects without the valid object bit. This issue will be debugged and fixed after this PR. --- src/aotcompile.cpp | 1 + src/ast.c | 4 +- src/builtins.c | 11 ++ src/cgutils.cpp | 4 +- src/codegen.cpp | 10 ++ src/datatype.c | 6 + src/gc-interface.h | 4 + src/gc-mmtk.c | 295 +++++++++++++++++++++++++++++++++++++++-- src/gc-stock.c | 10 ++ src/genericmemory.c | 5 +- src/gf.c | 2 + src/ircode.c | 4 +- src/jitlayers.h | 3 +- src/jl_uv.c | 2 + src/julia.h | 13 ++ src/method.c | 2 + src/module.c | 1 + src/runtime_ccall.cpp | 2 + src/staticdata.c | 8 +- src/staticdata_utils.c | 3 +- src/task.c | 12 +- src/toplevel.c | 1 + 22 files changed, 381 insertions(+), 22 deletions(-) diff --git a/src/aotcompile.cpp b/src/aotcompile.cpp index 5524518da46fa..f42fc1d588c71 100644 --- a/src/aotcompile.cpp +++ b/src/aotcompile.cpp @@ -72,6 +72,7 @@ typedef struct { SmallVector jl_sysimg_fvars; SmallVector jl_sysimg_gvars; std::map> jl_fvar_map; + // This holds references to the heap. Need to be pinned. SmallVector jl_value_to_llvm; SmallVector jl_external_to_llvm; } jl_native_code_desc_t; diff --git a/src/ast.c b/src/ast.c index d8af42757ff72..1e26e4fadf048 100644 --- a/src/ast.c +++ b/src/ast.c @@ -780,7 +780,9 @@ static value_t julia_to_list2_noalloc(fl_context_t *fl_ctx, jl_value_t *a, jl_va static value_t julia_to_scm_(fl_context_t *fl_ctx, jl_value_t *v, int check_valid) { - PTR_PIN(v); + // The following code will take internal pointers to v's fields. We need to make sure + // that v will not be moved by GC. + OBJ_PIN(v); value_t retval; if (julia_to_scm_noalloc1(fl_ctx, v, &retval)) return retval; diff --git a/src/builtins.c b/src/builtins.c index f67ef65d35356..8ac9112e2d69a 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -344,6 +344,9 @@ static uintptr_t type_object_id_(jl_value_t *v, jl_varidx_t *env) JL_NOTSAFEPOIN i++; pe = pe->prev; } + // FIXME: Pinning objects that get hashed + // until we implement address space hashing. + OBJ_PIN(v); uintptr_t bits = jl_astaggedvalue(v)->header; if (bits & GC_IN_IMAGE) return ((uintptr_t*)v)[-2]; @@ -400,6 +403,10 @@ static uintptr_t immut_id_(jl_datatype_t *dt, jl_value_t *v, uintptr_t h) JL_NOT // a few select pointers (notably symbol) also have special hash values // which may affect the stability of the objectid hash, even though // they don't affect egal comparison + + // FIXME: Pinning objects that get hashed + // until we implement address space hashing. + PTR_PIN(v); // This has to be a pointer pin -- v could be an internal pointer return bits_hash(v, sz) ^ h; } if (dt == jl_unionall_type) @@ -460,6 +467,10 @@ static uintptr_t NOINLINE jl_object_id__cold(uintptr_t tv, jl_value_t *v) JL_NOT uintptr_t bits = jl_astaggedvalue(v)->header; if (bits & GC_IN_IMAGE) return ((uintptr_t*)v)[-2]; + + // FIXME: Pinning objects that get hashed + // until we implement address space hashing. + OBJ_PIN(v); return inthash((uintptr_t)v); } return immut_id_(dt, v, dt->hash); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index cf8a32e74effa..f2899d8747e0c 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -398,6 +398,7 @@ static Constant *julia_pgv(jl_codectx_t &ctx, const char *cname, void *addr) // emit a GlobalVariable for a jl_value_t named "cname" // store the name given so we can reuse it (facilitating merging later) // so first see if there already is a GlobalVariable for this address + OBJ_PIN(addr); // This will be stored in the native heap. We need to pin it. GlobalVariable* &gv = ctx.emission_context.global_targets[addr]; Module *M = jl_Module; StringRef localname; @@ -570,7 +571,8 @@ static Value *literal_pointer_val(jl_codectx_t &ctx, jl_value_t *p) { if (p == NULL) return Constant::getNullValue(ctx.types().T_pjlvalue); - PTR_PIN(p); + // Pointers to p will be emitted into the code. Make sure p won't be moved by GC. + OBJ_PIN(p); Value *pgv = literal_pointer_val_slot(ctx, p); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_const); auto load = ai.decorateInst(maybe_mark_load_dereferenceable( diff --git a/src/codegen.cpp b/src/codegen.cpp index e9e4275672c7e..bc8da5e67e67e 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1823,6 +1823,7 @@ struct jl_cgval_t { promotion_point(nullptr), promotion_ssa(-1) { + OBJ_PIN(typ); // jl_cgval_t could be in the native heap. We have to pin the object references in it. assert(TIndex == nullptr || TIndex->getType() == getInt8Ty(TIndex->getContext())); } jl_cgval_t(Value *Vptr, bool isboxed, jl_value_t *typ, Value *tindex, MDNode *tbaa, Value* inline_roots) = delete; @@ -1839,6 +1840,7 @@ struct jl_cgval_t { promotion_point(nullptr), promotion_ssa(-1) { + OBJ_PIN(typ); // jl_cgval_t could be in the native heap. We have to pin the object references in it. if (Vboxed) assert(Vboxed->getType() == JuliaType::get_prjlvalue_ty(Vboxed->getContext())); assert(tbaa != nullptr); @@ -1859,6 +1861,8 @@ struct jl_cgval_t { promotion_point(nullptr), promotion_ssa(-1) { + OBJ_PIN(typ); // jl_cgval_t could be in the native heap. We have to pin the object references in it. + OBJ_PIN(constant); // jl_cgval_t could be in the native heap. We have to pin the object references in it. assert(jl_is_datatype(typ)); assert(constant); } @@ -1875,6 +1879,8 @@ struct jl_cgval_t { promotion_point(v.promotion_point), promotion_ssa(v.promotion_ssa) { + OBJ_PIN(typ); // jl_cgval_t could be in the native heap. We have to pin the object references in it. + OBJ_PIN(constant); // jl_cgval_t could be in the native heap. We have to pin the object references in it. if (Vboxed) assert(Vboxed->getType() == JuliaType::get_prjlvalue_ty(Vboxed->getContext())); // this constructor expects we had a badly or equivalently typed version @@ -1947,6 +1953,7 @@ class jl_codectx_t { std::map phic_slots; std::map > scope_restore; SmallVector SAvalues; + // The vector holds reference to Julia obj ref. We need to pin jl_value_t*. SmallVector, jl_value_t *>, 0> PhiNodes; SmallVector ssavalue_assigned; SmallVector ssavalue_usecount; @@ -6254,6 +6261,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) decay_derived(ctx, phi)); jl_cgval_t val = mark_julia_slot(ptr, phiType, Tindex_phi, best_tbaa(ctx.tbaa(), phiType)); val.Vboxed = ptr_phi; + OBJ_PIN(r); // r will be saved to a data structure in the native heap, make sure it won't be moved by GC. ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, ptr_phi, roots, r)); ctx.SAvalues[idx] = val; ctx.ssavalue_assigned[idx] = true; @@ -6263,6 +6271,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) PHINode *Tindex_phi = PHINode::Create(getInt8Ty(ctx.builder.getContext()), jl_array_nrows(edges), "tindex_phi"); Tindex_phi->insertInto(BB, InsertPt); jl_cgval_t val = mark_julia_slot(NULL, phiType, Tindex_phi, ctx.tbaa().tbaa_stack); + OBJ_PIN(r); // r will be saved to a data structure in the native heap, make sure it won't be moved by GC. ctx.PhiNodes.push_back(std::make_tuple(val, BB, dest, (PHINode*)nullptr, roots, r)); ctx.SAvalues[idx] = val; ctx.ssavalue_assigned[idx] = true; @@ -6313,6 +6322,7 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) value_phi->insertInto(BB, InsertPt); slot = mark_julia_type(ctx, value_phi, isboxed, phiType); } + OBJ_PIN(r); // r will be saved to a data structure in the native heap, make sure it won't be moved by GC. ctx.PhiNodes.push_back(std::make_tuple(slot, BB, dest, value_phi, roots, r)); ctx.SAvalues[idx] = slot; ctx.ssavalue_assigned[idx] = true; diff --git a/src/datatype.c b/src/datatype.c index fd25cca503676..641591ea69a98 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -64,6 +64,9 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu jl_typename_t *tn = (jl_typename_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typename_t), jl_typename_type); + // Typenames should be pinned since they are used as metadata, and are + // read during scan_object + OBJ_PIN(tn); tn->name = name; tn->module = module; tn->wrapper = NULL; @@ -96,6 +99,9 @@ jl_datatype_t *jl_new_uninitialized_datatype(void) { jl_task_t *ct = jl_current_task; jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type); + // Types should be pinned since they are used as metadata, and are + // read during scan_object + OBJ_PIN(t); jl_set_typetagof(t, jl_datatype_tag, 0); t->hash = 0; t->hasfreetypevars = 0; diff --git a/src/gc-interface.h b/src/gc-interface.h index ebdf26fcdb73d..6055a613c567a 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -101,6 +101,10 @@ JL_DLLEXPORT void jl_gc_set_max_memory(uint64_t max_mem); JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection); // Returns whether the thread with `tid` is a collector thread JL_DLLEXPORT int gc_is_collector_thread(int tid) JL_NOTSAFEPOINT; +// Pinning objects; Returns whether the object has been pinned by this call. +JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj); +// Pinning objects through a potential internal pointer; Returns whether the object has been pinned by this call. +JL_DLLEXPORT unsigned char jl_gc_pin_pointer(void* ptr); // Returns which GC implementation is being used and possibly its version according to the list of supported GCs // NB: it should clearly identify the GC by including e.g. ‘stock’ or ‘mmtk’ as a substring. JL_DLLEXPORT const char* jl_gc_active_impl(void); diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index f4353003943e9..792e83988d4ea 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -313,6 +313,14 @@ JL_DLLEXPORT void jl_gc_prepare_to_collect(void) errno = last_errno; } +JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj) { + return mmtk_pin_object(obj); +} + +JL_DLLEXPORT unsigned char jl_gc_pin_pointer(void* ptr) { + return mmtk_pin_pointer(ptr); +} + JL_DLLEXPORT void jl_gc_notify_thread_yield(jl_ptls_t ptls, void* ctx) { if (ctx == NULL) { // Save the context for the thread as it was running at the time of the call @@ -496,35 +504,292 @@ static void add_node_to_tpinned_roots_buffer(RootsWorkClosure* closure, RootsWor } } -JL_DLLEXPORT void jl_gc_scan_vm_specific_roots(RootsWorkClosure* closure) +// staticdata_utils.c +extern jl_array_t *internal_methods; +extern jl_array_t *newly_inferred; +// task.c +extern jl_function_t* task_done_hook_func; + +#define TRACE_GLOBALLY_ROOTED(r) add_node_to_roots_buffer(closure, buf, buf_len, r) + +// This is a list of global variables that are marked with JL_GLOBALLY_ROOTED. We need to make sure that they +// won't be moved. Otherwise, when we access those objects from the C global variables, we may see moved references. +void trace_full_globally_rooted(RootsWorkClosure* closure, RootsWorkBuffer* buf, size_t* buf_len) { - // Create a new buf - RootsWorkBuffer buf = (closure->report_nodes_func)((void**)0, 0, 0, closure->data, true); - size_t len = 0; + TRACE_GLOBALLY_ROOTED(cmpswap_names); + TRACE_GLOBALLY_ROOTED(jl_typeinf_func); + TRACE_GLOBALLY_ROOTED(_jl_debug_method_invalidation); + // Max 4096 + for (size_t i = 0; i < N_CALL_CACHE; i++) { + TRACE_GLOBALLY_ROOTED(call_cache[i]); + } + // julia_internal.h + TRACE_GLOBALLY_ROOTED(jl_type_type_mt); + TRACE_GLOBALLY_ROOTED(jl_nonfunction_mt); + TRACE_GLOBALLY_ROOTED(jl_kwcall_mt); + TRACE_GLOBALLY_ROOTED(jl_opaque_closure_method); + TRACE_GLOBALLY_ROOTED(jl_nulldebuginfo); + TRACE_GLOBALLY_ROOTED(_jl_debug_method_invalidation); + TRACE_GLOBALLY_ROOTED(jl_module_init_order); + // TRACE_GLOBALLY_ROOTED(jl_current_modules); -- we cannot trace a htable_t. So we trace each module. + for (size_t i = 0; i < jl_current_modules.size; i += 2) { + if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { + TRACE_GLOBALLY_ROOTED(jl_current_modules.table[i]); + } + } + for (size_t i = 0; i < N_CALL_CACHE; i++) { + jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); + TRACE_GLOBALLY_ROOTED(v); + } + TRACE_GLOBALLY_ROOTED(jl_precompile_toplevel_module); + TRACE_GLOBALLY_ROOTED(jl_global_roots_list); + TRACE_GLOBALLY_ROOTED(jl_global_roots_keyset); + TRACE_GLOBALLY_ROOTED(precompile_field_replace); + // julia.h + TRACE_GLOBALLY_ROOTED(jl_typeofbottom_type); + TRACE_GLOBALLY_ROOTED(jl_datatype_type); + TRACE_GLOBALLY_ROOTED(jl_uniontype_type); + TRACE_GLOBALLY_ROOTED(jl_unionall_type); + TRACE_GLOBALLY_ROOTED(jl_tvar_type); + + TRACE_GLOBALLY_ROOTED(jl_any_type); + TRACE_GLOBALLY_ROOTED(jl_type_type); + TRACE_GLOBALLY_ROOTED(jl_typename_type); + TRACE_GLOBALLY_ROOTED(jl_type_typename); + TRACE_GLOBALLY_ROOTED(jl_symbol_type); + TRACE_GLOBALLY_ROOTED(jl_ssavalue_type); + TRACE_GLOBALLY_ROOTED(jl_slotnumber_type); + TRACE_GLOBALLY_ROOTED(jl_argument_type); + TRACE_GLOBALLY_ROOTED(jl_const_type); + TRACE_GLOBALLY_ROOTED(jl_partial_struct_type); + TRACE_GLOBALLY_ROOTED(jl_partial_opaque_type); + TRACE_GLOBALLY_ROOTED(jl_interconditional_type); + TRACE_GLOBALLY_ROOTED(jl_method_match_type); + TRACE_GLOBALLY_ROOTED(jl_simplevector_type); + TRACE_GLOBALLY_ROOTED(jl_tuple_typename); + TRACE_GLOBALLY_ROOTED(jl_vecelement_typename); + TRACE_GLOBALLY_ROOTED(jl_anytuple_type); + TRACE_GLOBALLY_ROOTED(jl_emptytuple_type); + TRACE_GLOBALLY_ROOTED(jl_anytuple_type_type); + TRACE_GLOBALLY_ROOTED(jl_vararg_type); + TRACE_GLOBALLY_ROOTED(jl_function_type); + TRACE_GLOBALLY_ROOTED(jl_builtin_type); + TRACE_GLOBALLY_ROOTED(jl_opaque_closure_type); + TRACE_GLOBALLY_ROOTED(jl_opaque_closure_typename); + + TRACE_GLOBALLY_ROOTED(jl_bottom_type); + TRACE_GLOBALLY_ROOTED(jl_method_instance_type); + TRACE_GLOBALLY_ROOTED(jl_code_instance_type); + TRACE_GLOBALLY_ROOTED(jl_code_info_type); + TRACE_GLOBALLY_ROOTED(jl_debuginfo_type); + TRACE_GLOBALLY_ROOTED(jl_method_type); + TRACE_GLOBALLY_ROOTED(jl_module_type); + TRACE_GLOBALLY_ROOTED(jl_addrspace_type); + TRACE_GLOBALLY_ROOTED(jl_addrspacecore_type); + TRACE_GLOBALLY_ROOTED(jl_abstractarray_type); + TRACE_GLOBALLY_ROOTED(jl_densearray_type); + TRACE_GLOBALLY_ROOTED(jl_array_type); + TRACE_GLOBALLY_ROOTED(jl_array_typename); + TRACE_GLOBALLY_ROOTED(jl_genericmemory_type); + TRACE_GLOBALLY_ROOTED(jl_genericmemory_typename); + TRACE_GLOBALLY_ROOTED(jl_genericmemoryref_type); + TRACE_GLOBALLY_ROOTED(jl_genericmemoryref_typename); + TRACE_GLOBALLY_ROOTED(jl_weakref_type); + TRACE_GLOBALLY_ROOTED(jl_abstractstring_type); + TRACE_GLOBALLY_ROOTED(jl_string_type); + TRACE_GLOBALLY_ROOTED(jl_errorexception_type); + TRACE_GLOBALLY_ROOTED(jl_argumenterror_type); + TRACE_GLOBALLY_ROOTED(jl_loaderror_type); + TRACE_GLOBALLY_ROOTED(jl_initerror_type); + TRACE_GLOBALLY_ROOTED(jl_typeerror_type); + TRACE_GLOBALLY_ROOTED(jl_methoderror_type); + TRACE_GLOBALLY_ROOTED(jl_undefvarerror_type); + TRACE_GLOBALLY_ROOTED(jl_fielderror_type); + TRACE_GLOBALLY_ROOTED(jl_atomicerror_type); + TRACE_GLOBALLY_ROOTED(jl_missingcodeerror_type); + TRACE_GLOBALLY_ROOTED(jl_lineinfonode_type); + TRACE_GLOBALLY_ROOTED(jl_stackovf_exception); + TRACE_GLOBALLY_ROOTED(jl_memory_exception); + TRACE_GLOBALLY_ROOTED(jl_readonlymemory_exception); + TRACE_GLOBALLY_ROOTED(jl_diverror_exception); + TRACE_GLOBALLY_ROOTED(jl_undefref_exception); + TRACE_GLOBALLY_ROOTED(jl_interrupt_exception); + TRACE_GLOBALLY_ROOTED(jl_precompilable_error); + TRACE_GLOBALLY_ROOTED(jl_boundserror_type); + TRACE_GLOBALLY_ROOTED(jl_an_empty_vec_any); + TRACE_GLOBALLY_ROOTED(jl_an_empty_memory_any); + TRACE_GLOBALLY_ROOTED(jl_an_empty_string); + + TRACE_GLOBALLY_ROOTED(jl_bool_type); + TRACE_GLOBALLY_ROOTED(jl_char_type); + TRACE_GLOBALLY_ROOTED(jl_int8_type); + TRACE_GLOBALLY_ROOTED(jl_uint8_type); + TRACE_GLOBALLY_ROOTED(jl_int16_type); + TRACE_GLOBALLY_ROOTED(jl_uint16_type); + TRACE_GLOBALLY_ROOTED(jl_int32_type); + TRACE_GLOBALLY_ROOTED(jl_uint32_type); + TRACE_GLOBALLY_ROOTED(jl_int64_type); + TRACE_GLOBALLY_ROOTED(jl_uint64_type); + TRACE_GLOBALLY_ROOTED(jl_float16_type); + TRACE_GLOBALLY_ROOTED(jl_float32_type); + TRACE_GLOBALLY_ROOTED(jl_float64_type); + TRACE_GLOBALLY_ROOTED(jl_floatingpoint_type); + TRACE_GLOBALLY_ROOTED(jl_number_type); + TRACE_GLOBALLY_ROOTED(jl_void_type); // deprecated + TRACE_GLOBALLY_ROOTED(jl_nothing_type); + TRACE_GLOBALLY_ROOTED(jl_signed_type); + TRACE_GLOBALLY_ROOTED(jl_voidpointer_type); + TRACE_GLOBALLY_ROOTED(jl_uint8pointer_type); + TRACE_GLOBALLY_ROOTED(jl_pointer_type); + TRACE_GLOBALLY_ROOTED(jl_llvmpointer_type); + TRACE_GLOBALLY_ROOTED(jl_ref_type); + TRACE_GLOBALLY_ROOTED(jl_pointer_typename); + TRACE_GLOBALLY_ROOTED(jl_llvmpointer_typename); + TRACE_GLOBALLY_ROOTED(jl_namedtuple_typename); + TRACE_GLOBALLY_ROOTED(jl_namedtuple_type); + TRACE_GLOBALLY_ROOTED(jl_task_type); + TRACE_GLOBALLY_ROOTED(jl_pair_type); + + TRACE_GLOBALLY_ROOTED(jl_array_uint8_type); + TRACE_GLOBALLY_ROOTED(jl_array_any_type); + TRACE_GLOBALLY_ROOTED(jl_array_symbol_type); + TRACE_GLOBALLY_ROOTED(jl_array_int32_type); + TRACE_GLOBALLY_ROOTED(jl_array_uint32_type); + TRACE_GLOBALLY_ROOTED(jl_array_uint64_type); + TRACE_GLOBALLY_ROOTED(jl_memory_uint8_type); + TRACE_GLOBALLY_ROOTED(jl_memory_uint16_type); + TRACE_GLOBALLY_ROOTED(jl_memory_uint32_type); + TRACE_GLOBALLY_ROOTED(jl_memory_uint64_type); + TRACE_GLOBALLY_ROOTED(jl_memory_any_type); + TRACE_GLOBALLY_ROOTED(jl_memoryref_uint8_type); + TRACE_GLOBALLY_ROOTED(jl_memoryref_any_type); + TRACE_GLOBALLY_ROOTED(jl_expr_type); + TRACE_GLOBALLY_ROOTED(jl_binding_type); + TRACE_GLOBALLY_ROOTED(jl_binding_partition_type); + TRACE_GLOBALLY_ROOTED(jl_globalref_type); + TRACE_GLOBALLY_ROOTED(jl_linenumbernode_type); + TRACE_GLOBALLY_ROOTED(jl_gotonode_type); + TRACE_GLOBALLY_ROOTED(jl_gotoifnot_type); + TRACE_GLOBALLY_ROOTED(jl_enternode_type); + TRACE_GLOBALLY_ROOTED(jl_returnnode_type); + TRACE_GLOBALLY_ROOTED(jl_phinode_type); + TRACE_GLOBALLY_ROOTED(jl_pinode_type); + TRACE_GLOBALLY_ROOTED(jl_phicnode_type); + TRACE_GLOBALLY_ROOTED(jl_upsilonnode_type); + TRACE_GLOBALLY_ROOTED(jl_quotenode_type); + TRACE_GLOBALLY_ROOTED(jl_newvarnode_type); + TRACE_GLOBALLY_ROOTED(jl_intrinsic_type); + TRACE_GLOBALLY_ROOTED(jl_methtable_type); + TRACE_GLOBALLY_ROOTED(jl_typemap_level_type); + TRACE_GLOBALLY_ROOTED(jl_typemap_entry_type); + + TRACE_GLOBALLY_ROOTED(jl_emptysvec); + TRACE_GLOBALLY_ROOTED(jl_emptytuple); + TRACE_GLOBALLY_ROOTED(jl_true); + TRACE_GLOBALLY_ROOTED(jl_false); + TRACE_GLOBALLY_ROOTED(jl_nothing); + TRACE_GLOBALLY_ROOTED(jl_kwcall_func); + + TRACE_GLOBALLY_ROOTED(jl_libdl_dlopen_func); + + TRACE_GLOBALLY_ROOTED(jl_main_module); + TRACE_GLOBALLY_ROOTED(jl_core_module); + TRACE_GLOBALLY_ROOTED(jl_base_module); + TRACE_GLOBALLY_ROOTED(jl_top_module); + TRACE_GLOBALLY_ROOTED(jl_libdl_module); + + // staticdata_utils.c + TRACE_GLOBALLY_ROOTED(internal_methods); + TRACE_GLOBALLY_ROOTED(newly_inferred); + // task.c + TRACE_GLOBALLY_ROOTED(task_done_hook_func); + // threading.c + // TRACE_GLOBALLY_ROOTED(jl_all_tls_states); -- we don't need to pin these. Julia TLS are allocated with calloc. +} + +// These are from gc_mark_roots -- this is not enough for a moving GC. We need to make sure +// all the globally rooted symbols are traced and will not move. This function is unused. +// We use trace_full_globally_rooted() instead. +void trace_partial_globally_rooted(RootsWorkClosure* closure, RootsWorkBuffer* buf, size_t* buf_len) +{ // add module - add_node_to_roots_buffer(closure, &buf, &len, jl_main_module); + TRACE_GLOBALLY_ROOTED(jl_main_module); // buildin values - add_node_to_roots_buffer(closure, &buf, &len, jl_an_empty_vec_any); - add_node_to_roots_buffer(closure, &buf, &len, jl_module_init_order); + TRACE_GLOBALLY_ROOTED(jl_an_empty_vec_any); + TRACE_GLOBALLY_ROOTED(jl_module_init_order); for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { - add_node_to_roots_buffer(closure, &buf, &len, jl_current_modules.table[i]); + TRACE_GLOBALLY_ROOTED(jl_current_modules.table[i]); } } - add_node_to_roots_buffer(closure, &buf, &len, jl_anytuple_type_type); + TRACE_GLOBALLY_ROOTED(jl_anytuple_type_type); for (size_t i = 0; i < N_CALL_CACHE; i++) { - jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); - add_node_to_roots_buffer(closure, &buf, &len, v); + jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); + TRACE_GLOBALLY_ROOTED(v); } - add_node_to_roots_buffer(closure, &buf, &len, _jl_debug_method_invalidation); + TRACE_GLOBALLY_ROOTED(_jl_debug_method_invalidation); // constants - add_node_to_roots_buffer(closure, &buf, &len, jl_emptytuple_type); - add_node_to_roots_buffer(closure, &buf, &len, cmpswap_names); + TRACE_GLOBALLY_ROOTED(jl_emptytuple_type); + TRACE_GLOBALLY_ROOTED(cmpswap_names); + TRACE_GLOBALLY_ROOTED(jl_global_roots_list); + TRACE_GLOBALLY_ROOTED(jl_global_roots_keyset); + TRACE_GLOBALLY_ROOTED(precompile_field_replace); +} + +JL_DLLEXPORT void jl_gc_scan_vm_specific_roots(RootsWorkClosure* closure) +{ + // Create a new buf + RootsWorkBuffer buf = (closure->report_nodes_func)((void**)0, 0, 0, closure->data, true); + size_t len = 0; + + // globally rooted + trace_full_globally_rooted(closure, &buf, &len); + + // Simply pin things in global roots table + size_t i; + // for (i = 0; i < jl_array_len(jl_global_roots_table); i++) { + // jl_value_t* root = jl_array_ptr_ref(jl_global_roots_table, i); + // add_node_to_roots_buffer(closure, &buf, &len, root); + // } + // for (i = 0; i < jl_global_roots_list->length; i++) { + // jl_value_t* root = jl_genericmemory_ptr_ref(jl_global_roots_list, i); + // add_node_to_roots_buffer(closure, &buf, &len, root); + // } + // for (i = 0; i < jl_global_roots_keyset->length; i++) { + // jl_value_t* root = jl_genericmemory_ptr_ref(jl_global_roots_keyset, i); + // add_node_to_roots_buffer(closure, &buf, &len, root); + // } + // add_node_to_roots_buffer(closure, &buf, &len, jl_global_roots_list); + // add_node_to_roots_buffer(closure, &buf, &len, jl_global_roots_keyset); + + // // add module + // add_node_to_roots_buffer(closure, &buf, &len, jl_main_module); + + // // buildin values + // add_node_to_roots_buffer(closure, &buf, &len, jl_an_empty_vec_any); + // add_node_to_roots_buffer(closure, &buf, &len, jl_module_init_order); + // for (size_t i = 0; i < jl_current_modules.size; i += 2) { + // if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { + // add_node_to_roots_buffer(closure, &buf, &len, jl_current_modules.table[i]); + // } + // } + // add_node_to_roots_buffer(closure, &buf, &len, jl_anytuple_type_type); + // for (size_t i = 0; i < N_CALL_CACHE; i++) { + // jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); + // add_node_to_roots_buffer(closure, &buf, &len, v); + // } + // add_node_to_roots_buffer(closure, &buf, &len, _jl_debug_method_invalidation); + + // // constants + // add_node_to_roots_buffer(closure, &buf, &len, jl_emptytuple_type); + // add_node_to_roots_buffer(closure, &buf, &len, cmpswap_names); + // add_node_to_roots_buffer(closure, &buf, &len, precompile_field_replace); // jl_global_roots_table must be transitively pinned + // FIXME: We need to remove transitive pinning of global roots. Otherwise they may pin most of the objects in the heap. RootsWorkBuffer tpinned_buf = (closure->report_tpinned_nodes_func)((void**)0, 0, 0, closure->data, true); size_t tpinned_len = 0; add_node_to_tpinned_roots_buffer(closure, &tpinned_buf, &tpinned_len, jl_global_roots_list); @@ -789,6 +1054,8 @@ JL_DLLEXPORT jl_weakref_t *jl_gc_new_weakref_th(jl_ptls_t ptls, jl_value_t *valu { jl_weakref_t *wr = (jl_weakref_t*)jl_gc_alloc(ptls, sizeof(void*), jl_weakref_type); wr->value = value; // NOTE: wb not needed here + // Note: we are using MMTk's weak ref processing. If we switch to Julia's weak ref processing, + // we need to make sure the value and the weak ref won't be moved (e.g. pin them) mmtk_add_weak_candidate(wr); return wr; } diff --git a/src/gc-stock.c b/src/gc-stock.c index 6e4f14e5412f2..0ee7c9e6aeabb 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -4085,6 +4085,8 @@ JL_DLLEXPORT void jl_gc_schedule_foreign_sweepfunc(jl_ptls_t ptls, jl_value_t *o arraylist_push(&ptls->gc_tls.sweep_objs, obj); } +// added for MMTk integration + void jl_gc_notify_image_load(const char* img_data, size_t len) { // Do nothing @@ -4099,6 +4101,14 @@ JL_DLLEXPORT const char* jl_gc_active_impl(void) { return "Built with stock GC"; } +JL_DLLEXPORT unsigned char jl_gc_pin_object(void* obj) { + return 0; +} + +JL_DLLEXPORT unsigned char jl_gc_pin_pointer(void* ptr) { + return 0; +} + JL_DLLEXPORT void jl_gc_preserve_begin_hook(int n, ...) JL_NOTSAFEPOINT { jl_unreachable(); diff --git a/src/genericmemory.c b/src/genericmemory.c index 4180f8f58ebdd..1a576f9f44763 100644 --- a/src/genericmemory.c +++ b/src/genericmemory.c @@ -41,6 +41,8 @@ JL_DLLEXPORT jl_genericmemory_t *jl_alloc_genericmemory_unchecked(jl_ptls_t ptls m = (jl_genericmemory_t*)jl_gc_alloc(ptls, tot, mtype); if (pooled) { data = (char*)m + JL_SMALL_BYTE_ALIGNMENT; + // Data is inlined and ptr is an internal pointer. We pin the object so the ptr will not be invalid. + OBJ_PIN(m); } else { int isaligned = 1; // jl_gc_managed_malloc is always aligned @@ -111,6 +113,7 @@ JL_DLLEXPORT jl_genericmemory_t *jl_string_to_genericmemory(jl_value_t *str) m->length = jl_string_len(str); m->ptr = jl_string_data(str); jl_genericmemory_data_owner_field(m) = str; + OBJ_PIN(str); return m; } @@ -166,7 +169,7 @@ JL_DLLEXPORT jl_genericmemory_t *jl_ptr_to_genericmemory(jl_value_t *mtype, void m->length = nel; jl_genericmemory_data_owner_field(m) = own_buffer ? (jl_value_t*)m : NULL; if (own_buffer) { - // FIXME: PTR_PIN(m); + OBJ_PIN(m); int isaligned = 0; // TODO: allow passing memalign'd buffers jl_gc_track_malloced_genericmemory(ct->ptls, m, isaligned); size_t allocated_bytes = memory_block_usable_size(data, isaligned); diff --git a/src/gf.c b/src/gf.c index 710dda208f0b2..3d4dc934a7df8 100644 --- a/src/gf.c +++ b/src/gf.c @@ -645,6 +645,8 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst( jl_atomic_store_relaxed(&codeinst->next, NULL); jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects); codeinst->analysis_results = analysis_results; + // Pin codeinst, as they are referenced by vectors and maps in _jl_codegen_params_t + OBJ_PIN(codeinst); return codeinst; } diff --git a/src/ircode.c b/src/ircode.c index 99c5833ac3be7..5fd25e316919b 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -1661,12 +1661,14 @@ void jl_init_serializer(void) assert(LAST_TAG+1+i < 256); for (i = 2; i < 256; i++) { - if (deser_tag[i]) + if (deser_tag[i]) { + OBJHASH_PIN(deser_tag[i]) ptrhash_put(&ser_tag, deser_tag[i], (void*)i); } i = 2; while (common_symbols[i-2] != NULL) { + OBJHASH_PIN(common_symbols[i-2]) ptrhash_put(&common_symbol_tag, common_symbols[i-2], (void*)i); deser_symbols[i] = (jl_value_t*)common_symbols[i-2]; i += 1; diff --git a/src/jitlayers.h b/src/jitlayers.h index 568ad4f0bccd0..63997a68e2baf 100644 --- a/src/jitlayers.h +++ b/src/jitlayers.h @@ -227,6 +227,7 @@ struct jl_codegen_params_t { typedef StringMap SymMapGV; // outputs jl_workqueue_t workqueue; + // This map may hold Julia obj ref in the native heap. We need to pin the void*. std::map global_targets; jl_array_t *temporary_roots = nullptr; std::map, GlobalVariable*> external_fns; @@ -310,7 +311,7 @@ void add_named_global(StringRef name, void *addr) JL_NOTSAFEPOINT; static inline Constant *literal_static_pointer_val(const void *p, Type *T) JL_NOTSAFEPOINT { - PTR_PIN((void*)p); + PTR_PIN((void*)p); // This may point to non-mmtk heap memory. // this function will emit a static pointer into the generated code // the generated code will only be valid during the current session, // and thus, this should typically be avoided in new API's diff --git a/src/jl_uv.c b/src/jl_uv.c index 3498952622dce..3ab1961457918 100644 --- a/src/jl_uv.c +++ b/src/jl_uv.c @@ -469,6 +469,7 @@ JL_DLLEXPORT void jl_forceclose_uv(uv_handle_t *handle) JL_DLLEXPORT void jl_uv_associate_julia_struct(uv_handle_t *handle, jl_value_t *data) { + OBJ_PIN(data); handle->data = data; } @@ -479,6 +480,7 @@ JL_DLLEXPORT void jl_uv_associate_julia_struct(uv_handle_t *handle, */ JL_DLLEXPORT void jl_uv_disassociate_julia_struct(uv_handle_t *handle) { + // TODO: unpin here -- we need to implement pin count before we can unpin objects. handle->data = NULL; } diff --git a/src/julia.h b/src/julia.h index 660ec02ef8e99..17b226133349d 100644 --- a/src/julia.h +++ b/src/julia.h @@ -84,6 +84,19 @@ typedef struct _jl_value_t jl_value_t; extern "C" { #endif +// object pinning ------------------------------------------------------------ + +// FIXME: Pinning objects that get hashed in the ptrhash table +// until we implement address space hashing. +#define OBJHASH_PIN(key) if (key) jl_gc_pin_object(key); +#define PTRHASH_PIN(key) if (key) jl_gc_pin_pointer(key); + +// Called when pinning objects that would cause an error if moved +// The difference: the argument for pin_object needs to pointer to an object (jl_value_t*), +// but the argument for pin_pointer can be an internal pointer. +#define OBJ_PIN(key) if (key) jl_gc_pin_object(key); +#define PTR_PIN(key) if (key) jl_gc_pin_pointer(key); + // core data types ------------------------------------------------------------ struct _jl_taggedvalue_bits { diff --git a/src/method.c b/src/method.c index 8a14eb00182b1..14fea906e0270 100644 --- a/src/method.c +++ b/src/method.c @@ -561,6 +561,8 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void) jl_atomic_store_relaxed(&mi->cache, NULL); mi->cache_with_orig = 0; jl_atomic_store_relaxed(&mi->flags, 0); + // jl_method_instance_t needs to be pinned, as it is referenced in a map in JITDebugInfoRegistry + OBJ_PIN(mi); return mi; } diff --git a/src/module.c b/src/module.c index b2a4018519fca..dae56a9cd27c8 100644 --- a/src/module.c +++ b/src/module.c @@ -110,6 +110,7 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui jl_module_public(m, name, 1); JL_GC_POP(); } + OBJ_PIN(m); // modules are referenced in jl_current_modules (htable). They cannot move. return m; } diff --git a/src/runtime_ccall.cpp b/src/runtime_ccall.cpp index 2a6cb00961594..4bc26a7d716c4 100644 --- a/src/runtime_ccall.cpp +++ b/src/runtime_ccall.cpp @@ -320,6 +320,8 @@ jl_value_t *jl_get_cfunction_trampoline( tramp = trampoline_alloc(); ((void**)result)[0] = tramp; init_trampoline(tramp, nval); + OBJHASH_PIN((void*)fobj) + OBJHASH_PIN(result) ptrhash_put(cache, (void*)fobj, result); uv_mutex_unlock(&trampoline_lock); return result; diff --git a/src/staticdata.c b/src/staticdata.c index 26cf130bf5ce2..64f71a0df46c9 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -741,7 +741,9 @@ static int needs_uniquing(jl_value_t *v) JL_NOTSAFEPOINT static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAFEPOINT { - if (*addr != newval) + if (*addr != newval) { + OBJHASH_PIN((void*)addr) + OBJHASH_PIN((void*)newval) ptrhash_put(&field_replace, (void*)addr, newval); } @@ -2523,6 +2525,7 @@ static jl_svec_t *jl_prune_type_cache_hash(jl_svec_t *cache) JL_GC_DISABLED assert(serialization_queue.items[from_seroder_entry(idx)] == cache); cache = cache_rehash_set(cache, sz); // redirect all references to the old cache to relocate to the new cache object + OBJHASH_PIN((void*)cache) ptrhash_put(&serialization_order, cache, idx); serialization_queue.items[from_seroder_entry(idx)] = cache; return cache; @@ -3721,6 +3724,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl assert(tag == 0); arraylist_push(&delay_list, obj); arraylist_push(&delay_list, pfld); + OBJHASH_PIN(obj) ptrhash_put(&new_dt_objs, (void*)obj, obj); // mark obj as invalid *pfld = (uintptr_t)NULL; continue; @@ -3755,6 +3759,8 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image, jl } static_assert(offsetof(jl_datatype_t, name) == 0, ""); newdt->name = dt->name; + OBJHASH_PIN(newdt) + OBJHASH_PIN(dt) ptrhash_put(&new_dt_objs, (void*)newdt, dt); } else { diff --git a/src/staticdata_utils.c b/src/staticdata_utils.c index 1985357321a3a..dbd1f0f125dc3 100644 --- a/src/staticdata_utils.c +++ b/src/staticdata_utils.c @@ -82,7 +82,7 @@ static uint64_t jl_worklist_key(jl_array_t *worklist) JL_NOTSAFEPOINT return 0; } -static jl_array_t *newly_inferred JL_GLOBALLY_ROOTED /*FIXME*/; +jl_array_t *newly_inferred JL_GLOBALLY_ROOTED /*FIXME*/; // Mutex for newly_inferred jl_mutex_t newly_inferred_mutex; extern jl_mutex_t world_counter_lock; @@ -272,6 +272,7 @@ static void jl_collect_new_roots(jl_array_t *roots, jl_array_t *new_ext_cis, uin assert(jl_is_code_instance(ci)); jl_method_t *m = jl_get_ci_mi(ci)->def.method; assert(jl_is_method(m)); + OBJHASH_PIN(m) ptrhash_put(&mset, (void*)m, (void*)m); } int nwithkey; diff --git a/src/task.c b/src/task.c index 37e7f0e1f5440..d49a8c68e2aa3 100644 --- a/src/task.c +++ b/src/task.c @@ -307,7 +307,7 @@ CFI_NORETURN #endif /* Rooted by the base module */ -static _Atomic(jl_function_t*) task_done_hook_func JL_GLOBALLY_ROOTED = NULL; +_Atomic(jl_function_t*) task_done_hook_func JL_GLOBALLY_ROOTED = NULL; void JL_NORETURN jl_finish_task(jl_task_t *ct) { @@ -1074,6 +1074,8 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion jl_task_t *ct = jl_current_task; jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type); jl_set_typetagof(t, jl_task_tag, 0); + // Task cannot be moved, as jl_mutex_t (as globals) references tasks + OBJ_PIN(t); JL_PROBE_RT_NEW_TASK(ct, t); t->ctx.copy_stack = 0; if (ssize == 0) { @@ -1105,6 +1107,12 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion t->start = start; t->result = jl_nothing; t->donenotify = completion_future; + // completion_future is a GenericCondition with SpinLock. + // I am not sure why we have to pin this. But, if we don't pin it, + // it may get moved, and we still use the invalid old reference somehow. + // See https://github.com/mmtk/mmtk-julia/issues/179. + // TODO: We should understand where we get the invalid reference from. + OBJ_PIN(completion_future); jl_atomic_store_relaxed(&t->_isexception, 0); // Inherit scope from parent task t->scope = ct->scope; @@ -1538,6 +1546,8 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) jl_nothing = jl_gc_permobj(0, jl_nothing_type); jl_task_t *ct = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type); jl_set_typetagof(ct, jl_task_tag, 0); + // Task cannot be moved, as jl_mutex_t (as globals) references tasks + OBJ_PIN(ct); memset(ct, 0, sizeof(jl_task_t)); void *stack = stack_lo; size_t ssize = (char*)stack_hi - (char*)stack_lo; diff --git a/src/toplevel.c b/src/toplevel.c index cdd390b9b49ed..827882444410f 100644 --- a/src/toplevel.c +++ b/src/toplevel.c @@ -139,6 +139,7 @@ static jl_value_t *jl_eval_module_expr(jl_module_t *parent_module, jl_expr_t *ex jl_value_t *form = (jl_value_t*)newm; JL_GC_PUSH1(&form); JL_LOCK(&jl_modules_mutex); + OBJHASH_PIN(newm) ptrhash_put(&jl_current_modules, (void*)newm, (void*)((uintptr_t)HT_NOTFOUND + 1)); JL_UNLOCK(&jl_modules_mutex); From 2e66ea9e3599123fbc9c530c48381b063ccd0368 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 6 Feb 2025 17:28:12 +1300 Subject: [PATCH 09/11] Introduce non moving alloc (#86) This PR introduces a non moving alloc function, and use it for certain types that are not supposed to be moved. --- src/datatype.c | 10 ++-------- src/gc-common.c | 5 +++++ src/gc-interface.h | 3 +++ src/gc-mmtk.c | 9 +++++++++ src/gc-stock.c | 6 ++++++ src/gf.c | 4 +--- src/julia.h | 12 ++++++++++++ src/julia_internal.h | 5 +++++ src/julia_threads.h | 1 + src/method.c | 4 +--- src/module.c | 3 +-- src/task.c | 8 ++------ 12 files changed, 48 insertions(+), 22 deletions(-) diff --git a/src/datatype.c b/src/datatype.c index 641591ea69a98..1fb95b44a60ab 100644 --- a/src/datatype.c +++ b/src/datatype.c @@ -62,11 +62,8 @@ JL_DLLEXPORT jl_typename_t *jl_new_typename_in(jl_sym_t *name, jl_module_t *modu { jl_task_t *ct = jl_current_task; jl_typename_t *tn = - (jl_typename_t*)jl_gc_alloc(ct->ptls, sizeof(jl_typename_t), + (jl_typename_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_typename_t), jl_typename_type); - // Typenames should be pinned since they are used as metadata, and are - // read during scan_object - OBJ_PIN(tn); tn->name = name; tn->module = module; tn->wrapper = NULL; @@ -98,10 +95,7 @@ jl_datatype_t *jl_new_abstracttype(jl_value_t *name, jl_module_t *module, jl_dat jl_datatype_t *jl_new_uninitialized_datatype(void) { jl_task_t *ct = jl_current_task; - jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type); - // Types should be pinned since they are used as metadata, and are - // read during scan_object - OBJ_PIN(t); + jl_datatype_t *t = (jl_datatype_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_datatype_t), jl_datatype_type); jl_set_typetagof(t, jl_datatype_tag, 0); t->hash = 0; t->hasfreetypevars = 0; diff --git a/src/gc-common.c b/src/gc-common.c index c07b707b17709..0816db696fdb0 100644 --- a/src/gc-common.c +++ b/src/gc-common.c @@ -540,6 +540,11 @@ JL_DLLEXPORT jl_value_t *(jl_gc_alloc)(jl_ptls_t ptls, size_t sz, void *ty) return jl_gc_alloc_(ptls, sz, ty); } +JL_DLLEXPORT jl_value_t *(jl_gc_alloc_nonmoving)(jl_ptls_t ptls, size_t sz, void *ty) +{ + return jl_gc_alloc_nonmoving_(ptls, sz, ty); +} + JL_DLLEXPORT void *jl_malloc(size_t sz) { return jl_gc_counted_malloc(sz); diff --git a/src/gc-interface.h b/src/gc-interface.h index 6055a613c567a..81a43e4d2eef6 100644 --- a/src/gc-interface.h +++ b/src/gc-interface.h @@ -162,6 +162,9 @@ JL_DLLEXPORT uint64_t jl_gc_total_hrtime(void); // **must** also set the type of the returning object to be `ty`. The type `ty` may also be used to record // an allocation of that type in the allocation profiler. struct _jl_value_t *jl_gc_alloc_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); +// Similar to jl_gc_alloc_, except that the GC needs to make sure the object allocated from this function will +// not be moved by the GC. +struct _jl_value_t *jl_gc_alloc_nonmoving_(struct _jl_tls_states_t * ptls, size_t sz, void *ty); // Allocates small objects and increments Julia allocation counterst. Size of the object // header must be included in the object size. The (possibly unused in some implementations) // offset to the arena in which we're allocating is passed in the second parameter, and the diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index 792e83988d4ea..66a2c8ebf4fb0 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -1243,6 +1243,15 @@ inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) return v; } +inline jl_value_t *jl_gc_alloc_nonmoving_(jl_ptls_t ptls, size_t sz, void *ty) +{ + // TODO: Currently we just alloc and pin the object. We may use a + // different non moving allocator instead. + jl_value_t *v = jl_gc_alloc_(ptls, sz, ty); + OBJ_PIN(v); + return v; +} + // allocation wrappers that track allocation and let collection run JL_DLLEXPORT void *jl_gc_counted_malloc(size_t sz) { diff --git a/src/gc-stock.c b/src/gc-stock.c index 0ee7c9e6aeabb..8f67e5d7d4ea4 100644 --- a/src/gc-stock.c +++ b/src/gc-stock.c @@ -806,6 +806,12 @@ inline jl_value_t *jl_gc_alloc_(jl_ptls_t ptls, size_t sz, void *ty) return v; } +inline jl_value_t *jl_gc_alloc_nonmoving_(jl_ptls_t ptls, size_t sz, void *ty) +{ + // Just use the normal allocation, as the GC won't move objects anyway. + return jl_gc_alloc_(ptls, sz, ty); +} + int jl_gc_classify_pools(size_t sz, int *osize) { if (sz > GC_MAX_SZCLASS) diff --git a/src/gf.c b/src/gf.c index 3d4dc934a7df8..50b02bbb0ad39 100644 --- a/src/gf.c +++ b/src/gf.c @@ -620,7 +620,7 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst( assert(min_world <= max_world && "attempting to set invalid world constraints"); //assert((!jl_is_method(mi->def.value) || max_world != ~(size_t)0 || min_world <= 1 || edges == NULL || jl_svec_len(edges) != 0) && "missing edges"); jl_task_t *ct = jl_current_task; - jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_code_instance_t), + jl_code_instance_t *codeinst = (jl_code_instance_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_code_instance_t), jl_code_instance_type); codeinst->def = (jl_value_t*)mi; codeinst->owner = owner; @@ -645,8 +645,6 @@ JL_DLLEXPORT jl_code_instance_t *jl_new_codeinst( jl_atomic_store_relaxed(&codeinst->next, NULL); jl_atomic_store_relaxed(&codeinst->ipo_purity_bits, effects); codeinst->analysis_results = analysis_results; - // Pin codeinst, as they are referenced by vectors and maps in _jl_codegen_params_t - OBJ_PIN(codeinst); return codeinst; } diff --git a/src/julia.h b/src/julia.h index 17b226133349d..011802820be94 100644 --- a/src/julia.h +++ b/src/julia.h @@ -77,6 +77,13 @@ typedef struct _jl_tls_states_t *jl_ptls_t; // the common fields are hidden before the pointer, but the following macro is // used to indicate which types below are subtypes of jl_value_t #define JL_DATA_TYPE +// Objects of a type that is JL_NON_MOVING should be allocated with +// jl_gc_alloc_non_moving so they will never be moved by GC. +// Those types are usually frequently referenced by the runtime. +// It is basically a trade-off between allocating the objects as non-moving +// and pinning the objects after allocation. If objects of certain types are +// mostly likely to be pinned, it is a good idea to just allocate them as non moving. +#define JL_NON_MOVING typedef struct _jl_value_t jl_value_t; #include "julia_threads.h" @@ -404,6 +411,7 @@ typedef struct _jl_method_t { // can can be used as a unique dictionary key representation of a call to a particular Method // with a particular set of argument types struct _jl_method_instance_t { + JL_NON_MOVING // Non moving, as it is referenced in a map in JITDebugInfoRegistry JL_DATA_TYPE union { jl_value_t *value; // generic accessor @@ -436,6 +444,7 @@ typedef struct _jl_opaque_closure_t { // This type represents an executable operation typedef struct _jl_code_instance_t { + JL_NON_MOVING // Pin codeinst, as they are referenced by vectors and maps in _jl_codegen_params_t JL_DATA_TYPE jl_value_t *def; // MethodInstance or ABIOverride jl_value_t *owner; // Compiler token this belongs to, `jl_nothing` is reserved for native @@ -515,6 +524,7 @@ typedef struct { // of a type and storing all data common to different instantiations of the type, // including a cache for hash-consed allocation of DataType objects. typedef struct { + JL_NON_MOVING // Typenames should be pinned since they are used as metadata, and are read during scan_object JL_DATA_TYPE jl_sym_t *name; struct _jl_module_t *module; @@ -595,6 +605,7 @@ typedef struct { } jl_datatype_layout_t; typedef struct _jl_datatype_t { + JL_NON_MOVING // Types should not be moved. It is also referenced from the native heap in jl_raw_alloc_t. JL_DATA_TYPE jl_typename_t *name; struct _jl_datatype_t *super; @@ -723,6 +734,7 @@ typedef struct { } jl_uuid_t; typedef struct _jl_module_t { + JL_NON_MOVING // modules are referenced in jl_current_modules (htable). They cannot move. JL_DATA_TYPE jl_sym_t *name; struct _jl_module_t *parent; diff --git a/src/julia_internal.h b/src/julia_internal.h index 9817c8cc8263b..bb96b79f904bc 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -560,12 +560,17 @@ static_assert(ARRAY_CACHE_ALIGN_THRESHOLD > GC_MAX_SZCLASS, ""); * safepoints will be caught by the GC analyzer. */ JL_DLLEXPORT jl_value_t *jl_gc_alloc(jl_ptls_t ptls, size_t sz, void *ty); +JL_DLLEXPORT jl_value_t *jl_gc_alloc_nonmoving(jl_ptls_t ptls, size_t sz, void *ty); // On GCC, only inline when sz is constant #ifdef __GNUC__ # define jl_gc_alloc(ptls, sz, ty) \ (__builtin_constant_p(sz) ? \ jl_gc_alloc_(ptls, sz, ty) : \ (jl_gc_alloc)(ptls, sz, ty)) +# define jl_gc_alloc_nonmoving(ptls, sz, ty) \ + (__builtin_constant_p(sz) ? \ + jl_gc_alloc_nonmoving_(ptls, sz, ty) : \ + (jl_gc_alloc_nonmoving)(ptls, sz, ty)) #else # define jl_gc_alloc(ptls, sz, ty) jl_gc_alloc_(ptls, sz, ty) #endif diff --git a/src/julia_threads.h b/src/julia_threads.h index 427c3fda826cb..5c3edce2056f2 100644 --- a/src/julia_threads.h +++ b/src/julia_threads.h @@ -230,6 +230,7 @@ typedef struct _jl_excstack_t jl_excstack_t; typedef struct _jl_handler_t jl_handler_t; typedef struct _jl_task_t { + JL_NON_MOVING // jl_mutex_t (as globals) references tasks JL_DATA_TYPE jl_value_t *next; // invasive linked list for scheduler jl_value_t *queue; // invasive linked list for scheduler diff --git a/src/method.c b/src/method.c index 14fea906e0270..c77a930aa12ed 100644 --- a/src/method.c +++ b/src/method.c @@ -552,7 +552,7 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void) { jl_task_t *ct = jl_current_task; jl_method_instance_t *mi = - (jl_method_instance_t*)jl_gc_alloc(ct->ptls, sizeof(jl_method_instance_t), + (jl_method_instance_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_method_instance_t), jl_method_instance_type); mi->def.value = NULL; mi->specTypes = NULL; @@ -561,8 +561,6 @@ JL_DLLEXPORT jl_method_instance_t *jl_new_method_instance_uninit(void) jl_atomic_store_relaxed(&mi->cache, NULL); mi->cache_with_orig = 0; jl_atomic_store_relaxed(&mi->flags, 0); - // jl_method_instance_t needs to be pinned, as it is referenced in a map in JITDebugInfoRegistry - OBJ_PIN(mi); return mi; } diff --git a/src/module.c b/src/module.c index dae56a9cd27c8..2cb17a065fc44 100644 --- a/src/module.c +++ b/src/module.c @@ -75,7 +75,7 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui { jl_task_t *ct = jl_current_task; const jl_uuid_t uuid_zero = {0, 0}; - jl_module_t *m = (jl_module_t*)jl_gc_alloc(ct->ptls, sizeof(jl_module_t), + jl_module_t *m = (jl_module_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_module_t), jl_module_type); jl_set_typetagof(m, jl_module_tag, 0); assert(jl_is_symbol(name)); @@ -110,7 +110,6 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui jl_module_public(m, name, 1); JL_GC_POP(); } - OBJ_PIN(m); // modules are referenced in jl_current_modules (htable). They cannot move. return m; } diff --git a/src/task.c b/src/task.c index d49a8c68e2aa3..e6458280d93f7 100644 --- a/src/task.c +++ b/src/task.c @@ -1072,10 +1072,8 @@ void jl_rng_split(uint64_t dst[JL_RNG_SIZE], uint64_t src[JL_RNG_SIZE]) JL_NOTSA JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion_future, size_t ssize) { jl_task_t *ct = jl_current_task; - jl_task_t *t = (jl_task_t*)jl_gc_alloc(ct->ptls, sizeof(jl_task_t), jl_task_type); + jl_task_t *t = (jl_task_t*)jl_gc_alloc_nonmoving(ct->ptls, sizeof(jl_task_t), jl_task_type); jl_set_typetagof(t, jl_task_tag, 0); - // Task cannot be moved, as jl_mutex_t (as globals) references tasks - OBJ_PIN(t); JL_PROBE_RT_NEW_TASK(ct, t); t->ctx.copy_stack = 0; if (ssize == 0) { @@ -1544,10 +1542,8 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi) bootstrap_task.value.ptls = ptls; if (jl_nothing == NULL) // make a placeholder jl_nothing = jl_gc_permobj(0, jl_nothing_type); - jl_task_t *ct = (jl_task_t*)jl_gc_alloc(ptls, sizeof(jl_task_t), jl_task_type); + jl_task_t *ct = (jl_task_t*)jl_gc_alloc_nonmoving(ptls, sizeof(jl_task_t), jl_task_type); jl_set_typetagof(ct, jl_task_tag, 0); - // Task cannot be moved, as jl_mutex_t (as globals) references tasks - OBJ_PIN(ct); memset(ct, 0, sizeof(jl_task_t)); void *stack = stack_lo; size_t ssize = (char*)stack_hi - (char*)stack_lo; From 2e50283772f71a5bda1a05d7043eec45643e6193 Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Thu, 6 Feb 2025 05:03:33 +0000 Subject: [PATCH 10/11] Fix multiple merge issues --- src/gc-mmtk.c | 2 +- src/ircode.c | 1 + src/staticdata.c | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/gc-mmtk.c b/src/gc-mmtk.c index 66a2c8ebf4fb0..837bcc3347aa1 100644 --- a/src/gc-mmtk.c +++ b/src/gc-mmtk.c @@ -749,7 +749,7 @@ JL_DLLEXPORT void jl_gc_scan_vm_specific_roots(RootsWorkClosure* closure) trace_full_globally_rooted(closure, &buf, &len); // Simply pin things in global roots table - size_t i; + // size_t i; // for (i = 0; i < jl_array_len(jl_global_roots_table); i++) { // jl_value_t* root = jl_array_ptr_ref(jl_global_roots_table, i); // add_node_to_roots_buffer(closure, &buf, &len, root); diff --git a/src/ircode.c b/src/ircode.c index 5fd25e316919b..05fc3eef6fe7b 100644 --- a/src/ircode.c +++ b/src/ircode.c @@ -1664,6 +1664,7 @@ void jl_init_serializer(void) if (deser_tag[i]) { OBJHASH_PIN(deser_tag[i]) ptrhash_put(&ser_tag, deser_tag[i], (void*)i); + } } i = 2; diff --git a/src/staticdata.c b/src/staticdata.c index 64f71a0df46c9..734e001a7d27e 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -745,6 +745,7 @@ static void record_field_change(jl_value_t **addr, jl_value_t *newval) JL_NOTSAF OBJHASH_PIN((void*)addr) OBJHASH_PIN((void*)newval) ptrhash_put(&field_replace, (void*)addr, newval); + } } static jl_value_t *get_replaceable_field(jl_value_t **addr, int mutabl) JL_GC_DISABLED From 5cf8c825992fcd7822e78e9a36e8b7b1f20d573d Mon Sep 17 00:00:00 2001 From: Yi Lin Date: Fri, 7 Feb 2025 03:40:53 +0000 Subject: [PATCH 11/11] Allow setting MMTK_MOVING --- deps/mmtk_julia.mk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deps/mmtk_julia.mk b/deps/mmtk_julia.mk index 424113fd4164c..2c561b1b6a185 100644 --- a/deps/mmtk_julia.mk +++ b/deps/mmtk_julia.mk @@ -3,7 +3,7 @@ # Both MMTK_MOVING and MMTK_PLAN should be specified in the Make.user file. # At this point, since we only support non-moving this is always set to 0 # FIXME: change it to `?:` when introducing moving plans -MMTK_MOVING := 0 +MMTK_MOVING ?= 0 MMTK_VARS := MMTK_PLAN=$(MMTK_PLAN) MMTK_MOVING=$(MMTK_MOVING) ifneq ($(USE_BINARYBUILDER_MMTK_JULIA),1)