Skip to content

Commit f1b0405

Browse files
vtjnashfingolfin
authored andcommitted
fix jl_active_task_stack to return correct values
Seems potentially slightly costly to need to generate these extra writes for every safepoint transition to keep track of the current stack location, but that is potentially an unavoidable cost to doing conservative stack scanning for gc.
1 parent c4725ae commit f1b0405

File tree

12 files changed

+106
-117
lines changed

12 files changed

+106
-117
lines changed

src/gc-stock.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3414,6 +3414,7 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
34143414
}
34153415
jl_gc_debug_print();
34163416

3417+
ct->ctx.activefp = (char*)jl_get_frame_addr();
34173418
int8_t old_state = jl_atomic_load_relaxed(&ptls->gc_state);
34183419
jl_atomic_store_release(&ptls->gc_state, JL_GC_STATE_WAITING);
34193420
// `jl_safepoint_start_gc()` makes sure only one thread can run the GC.

src/jl_exported_funcs.inc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,6 @@
449449
XX(jl_tagged_gensym) \
450450
XX(jl_take_buffer) \
451451
XX(jl_task_get_next) \
452-
XX(jl_task_stack_buffer) \
453452
XX(jl_termios_size) \
454453
XX(jl_test_cpu_feature) \
455454
XX(jl_threadid) \

src/julia_gcext.h

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -135,19 +135,13 @@ JL_DLLEXPORT int jl_gc_conservative_gc_support_enabled(void);
135135
// NOTE: Only valid to call from within a GC context.
136136
JL_DLLEXPORT jl_value_t *jl_gc_internal_obj_base_ptr(void *p) JL_NOTSAFEPOINT;
137137

138-
// Return a non-null pointer to the start of the stack area if the task
139-
// has an associated stack buffer. In that case, *size will also contain
140-
// the size of that stack buffer upon return. Also, if task is a thread's
141-
// current task, that thread's id will be stored in *tid; otherwise,
142-
// *tid will be set to -1.
143-
//
144-
// DEPRECATED: use jl_active_task_stack() instead.
145-
JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *tid);
146-
147138
// Query the active and total stack range for the given task, and set
148139
// *active_start and *active_end respectively *total_start and *total_end
149140
// accordingly. The range for the active part is a best-effort approximation
150141
// and may not be tight.
142+
//
143+
// NOTE: Only valid to call from within a GC context. May return incorrect
144+
// values and segfault otherwise.
151145
JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
152146
char **active_start, char **active_end,
153147
char **total_start, char **total_end) JL_NOTSAFEPOINT;

src/julia_internal.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1765,18 +1765,6 @@ JL_DLLEXPORT unsigned jl_special_vector_alignment(size_t nfields, jl_value_t *fi
17651765
void register_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT;
17661766
void deregister_eh_frames(uint8_t *Addr, size_t Size) JL_NOTSAFEPOINT;
17671767

1768-
STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT
1769-
{
1770-
#ifdef __GNUC__
1771-
return __builtin_frame_address(0);
1772-
#else
1773-
void *dummy = NULL;
1774-
// The mask is to suppress the compiler warning about returning
1775-
// address of local variable
1776-
return (void*)((uintptr_t)&dummy & ~(uintptr_t)15);
1777-
#endif
1778-
}
1779-
17801768
// Log `msg` to the current logger by calling CoreLogging.logmsg_shim() on the
17811769
// julia side. If any of module, group, id, file or line are NULL, these will
17821770
// be passed to the julia side as `nothing`. If `kwargs` is NULL an empty set

src/julia_threads.h

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@ typedef struct {
9696
_jl_ucontext_t *ctx;
9797
jl_stack_context_t *copy_ctx;
9898
};
99+
void *activefp;
99100
void *stkbuf; // malloc'd memory (either copybuf or stack)
100101
size_t bufsz; // actual sizeof stkbuf
101102
unsigned int copy_stack:31; // sizeof stack for copybuf
@@ -316,6 +317,18 @@ JL_DLLEXPORT void (jl_cpu_pause)(void);
316317
JL_DLLEXPORT void (jl_cpu_suspend)(void);
317318
JL_DLLEXPORT void (jl_cpu_wake)(void);
318319

320+
STATIC_INLINE void *jl_get_frame_addr(void) JL_NOTSAFEPOINT
321+
{
322+
#ifdef __GNUC__
323+
return __builtin_frame_address(0);
324+
#else
325+
void *dummy = NULL;
326+
// The mask is to suppress the compiler warning about returning
327+
// address of local variable
328+
return (void*)((uintptr_t)&dummy & ~(uintptr_t)15);
329+
#endif
330+
}
331+
319332
#ifdef __clang_gcanalyzer__
320333
// Note that the sigint safepoint can also trigger GC, albeit less likely
321334
void jl_gc_safepoint_(jl_ptls_t tls);
@@ -343,6 +356,9 @@ STATIC_INLINE int8_t jl_gc_state_set(jl_ptls_t ptls, int8_t state,
343356
{
344357
assert(old_state != JL_GC_PARALLEL_COLLECTOR_THREAD);
345358
assert(old_state != JL_GC_CONCURRENT_COLLECTOR_THREAD);
359+
if (__builtin_constant_p(state) ? state : // required if !old_state && state, otherwise optional
360+
__builtin_constant_p(old_state) ? !old_state : 1)
361+
jl_atomic_load_relaxed(&ptls->current_task)->ctx.activefp = (char*)jl_get_frame_addr();
346362
jl_atomic_store_release(&ptls->gc_state, state);
347363
if (state == JL_GC_STATE_UNSAFE || old_state == JL_GC_STATE_UNSAFE)
348364
jl_gc_safepoint_(ptls);

src/llvm-codegen-shared.h

Lines changed: 18 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -237,16 +237,24 @@ static inline void emit_gc_safepoint(llvm::IRBuilder<> &builder, llvm::Type *T_s
237237
emit_signal_fence(builder);
238238
}
239239

240-
static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, llvm::Value *old_state, bool final)
240+
static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, llvm::Value *old_state, bool final)
241241
{
242242
using namespace llvm;
243+
Value *ptls = get_current_ptls_from_task(builder, task, tbaa);
243244
Type *T_int8 = state->getType();
244245
unsigned offset = offsetof(jl_tls_states_t, gc_state);
245246
Value *gc_state = builder.CreateConstInBoundsGEP1_32(T_int8, ptls, offset, "gc_state");
246247
if (old_state == nullptr) {
247248
old_state = builder.CreateLoad(T_int8, gc_state);
248249
cast<LoadInst>(old_state)->setOrdering(AtomicOrdering::Monotonic);
249250
}
251+
if (isa<Constant>(state) ? state == builder.getInt8(0) :
252+
isa<Constant>(old_state) ? old_state == builder.getInt8(0) : true) {
253+
unsigned offset = offsetof(jl_task_t, ctx.activefp);
254+
Value *currentfp = builder.CreateConstInBoundsGEP1_32(T_int8, task, offset, "gc_state");
255+
Value *fp = builder.CreateIntrinsic(Intrinsic::frameaddress, {builder.getPtrTy()}, {builder.getInt32(0)});
256+
builder.CreateAlignedStore(fp, currentfp, Align(sizeof(void*)));
257+
}
250258
builder.CreateAlignedStore(state, gc_state, Align(sizeof(void*)))->setOrdering(AtomicOrdering::Release);
251259
if (auto *C = dyn_cast<ConstantInt>(old_state))
252260
if (C->isZero())
@@ -261,39 +269,38 @@ static inline llvm::Value *emit_gc_state_set(llvm::IRBuilder<> &builder, llvm::T
261269
builder.CreateICmpEQ(state, zero8)),
262270
passBB, exitBB);
263271
builder.SetInsertPoint(passBB);
264-
MDNode *tbaa = get_tbaa_const(builder.getContext());
265-
emit_gc_safepoint(builder, T_size, ptls, tbaa, final);
272+
emit_gc_safepoint(builder, T_size, ptls, get_tbaa_const(builder.getContext()), final);
266273
builder.CreateBr(exitBB);
267274
builder.SetInsertPoint(exitBB);
268275
return old_state;
269276
}
270277

271-
static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final)
278+
static inline llvm::Value *emit_gc_unsafe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, bool final)
272279
{
273280
using namespace llvm;
274281
Value *state = builder.getInt8(0);
275-
return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final);
282+
return emit_gc_state_set(builder, T_size, task, tbaa, state, nullptr, final);
276283
}
277284

278-
static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
285+
static inline llvm::Value *emit_gc_unsafe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, bool final)
279286
{
280287
using namespace llvm;
281288
Value *old_state = builder.getInt8(JL_GC_STATE_UNSAFE);
282-
return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
289+
return emit_gc_state_set(builder, T_size, task, tbaa, state, old_state, final);
283290
}
284291

285-
static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, bool final)
292+
static inline llvm::Value *emit_gc_safe_enter(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, bool final)
286293
{
287294
using namespace llvm;
288295
Value *state = builder.getInt8(JL_GC_STATE_SAFE);
289-
return emit_gc_state_set(builder, T_size, ptls, state, nullptr, final);
296+
return emit_gc_state_set(builder, T_size, task, tbaa, state, nullptr, final);
290297
}
291298

292-
static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *ptls, llvm::Value *state, bool final)
299+
static inline llvm::Value *emit_gc_safe_leave(llvm::IRBuilder<> &builder, llvm::Type *T_size, llvm::Value *task, llvm::MDNode *tbaa, llvm::Value *state, bool final)
293300
{
294301
using namespace llvm;
295302
Value *old_state = builder.getInt8(JL_GC_STATE_SAFE);
296-
return emit_gc_state_set(builder, T_size, ptls, state, old_state, final);
303+
return emit_gc_state_set(builder, T_size, task, tbaa, state, old_state, final);
297304
}
298305

299306
// Compatibility shims for LLVM attribute APIs that were renamed in LLVM 14.

src/llvm-ptls.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -185,7 +185,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
185185
builder.SetInsertPoint(fastTerm->getParent());
186186
fastTerm->removeFromParent();
187187
MDNode *tbaa = tbaa_gcframe;
188-
Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, pgcstack), tbaa), true);
188+
Value *prior = emit_gc_unsafe_enter(builder, T_size, get_current_task_from_pgcstack(builder, pgcstack), tbaa, true);
189189
builder.Insert(fastTerm);
190190
phi->addIncoming(pgcstack, fastTerm->getParent());
191191
// emit pre-return cleanup
@@ -197,7 +197,7 @@ void LowerPTLS::fix_pgcstack_use(CallInst *pgcstack, Function *pgcstack_getter,
197197
for (auto &BB : *pgcstack->getParent()->getParent()) {
198198
if (isa<ReturnInst>(BB.getTerminator())) {
199199
builder.SetInsertPoint(BB.getTerminator());
200-
emit_gc_unsafe_leave(builder, T_size, get_current_ptls_from_task(builder, get_current_task_from_pgcstack(builder, phi), tbaa), last_gc_state, true);
200+
emit_gc_unsafe_leave(builder, T_size, get_current_task_from_pgcstack(builder, phi), tbaa, last_gc_state, true);
201201
}
202202
}
203203
}

src/safepoint.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ void jl_set_gc_and_wait(jl_task_t *ct) // n.b. not used on _OS_DARWIN_
240240
// reading own gc state doesn't need atomic ops since no one else
241241
// should store to it.
242242
int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
243+
ct->ctx.activefp = (char*)jl_get_frame_addr();
243244
jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
244245
uv_mutex_lock(&safepoint_lock);
245246
uv_cond_broadcast(&safepoint_cond_begin);
@@ -279,6 +280,7 @@ void jl_safepoint_wait_thread_resume(jl_task_t *ct)
279280
// might have already observed our gc_state.
280281
// if (!jl_atomic_load_relaxed(&ct->ptls->suspend_count)) return;
281282
int8_t state = jl_atomic_load_relaxed(&ct->ptls->gc_state);
283+
ct->ctx.activefp = (char*)jl_get_frame_addr();
282284
jl_atomic_store_release(&ct->ptls->gc_state, JL_GC_STATE_WAITING);
283285
uv_mutex_lock(&ct->ptls->sleep_lock);
284286
if (jl_atomic_load_relaxed(&ct->ptls->suspend_count)) {

src/signals-mach.c

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,17 @@ void jl_mach_gc_end(void)
108108
// implement jl_set_gc_and_wait from a different thread
109109
static void jl_mach_gc_wait(jl_ptls_t ptls2, mach_port_t thread, int16_t tid)
110110
{
111+
unsigned int count = MACH_THREAD_STATE_COUNT;
112+
host_thread_state_t state;
113+
kern_return_t ret = thread_get_state(thread, MACH_THREAD_STATE, (thread_state_t)&state, &count);
114+
ptls2->current_task->ctx.activefp =
115+
#ifdef _CPU_X86_64_
116+
state->__rsp;
117+
#elif defined(_CPU_AARCH64_)
118+
state->__sp;
119+
#else
120+
#error fp not defined for platform
121+
#endif
111122
// relaxed, since we don't mind missing one--we will hit another soon (immediately probably)
112123
uv_mutex_lock(&safepoint_lock);
113124
// Since this gets set to zero only while the safepoint_lock was held this

src/task.c

Lines changed: 25 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -352,81 +352,39 @@ void JL_NORETURN jl_finish_task(jl_task_t *ct)
352352
abort();
353353
}
354354

355-
JL_DLLEXPORT void *jl_task_stack_buffer(jl_task_t *task, size_t *size, int *ptid)
356-
{
357-
size_t off = 0;
358-
#ifndef _OS_WINDOWS_
359-
jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
360-
if (ptls0->root_task == task) {
361-
// See jl_init_root_task(). The root task of the main thread
362-
// has its buffer enlarged by an artificial 3000000 bytes, but
363-
// that means that the start of the buffer usually points to
364-
// inaccessible memory. We need to correct for this.
365-
off = ROOT_TASK_STACK_ADJUSTMENT;
366-
}
367-
#endif
368-
jl_ptls_t ptls2 = task->ptls;
369-
*ptid = -1;
370-
if (ptls2) {
371-
*ptid = jl_atomic_load_relaxed(&task->tid);
372-
#ifdef COPY_STACKS
373-
if (task->ctx.copy_stack) {
374-
*size = ptls2->stacksize;
375-
return (char *)ptls2->stackbase - *size;
376-
}
377-
#endif
378-
}
379-
*size = task->ctx.bufsz - off;
380-
return (void *)((char *)task->ctx.stkbuf + off);
381-
}
382-
383355
JL_DLLEXPORT void jl_active_task_stack(jl_task_t *task,
384356
char **active_start, char **active_end,
385357
char **total_start, char **total_end)
386358
{
387-
if (!task->ctx.started) {
388-
*total_start = *active_start = 0;
389-
*total_end = *active_end = 0;
359+
*total_start = *active_start = 0;
360+
*total_end = *active_end = 0;
361+
if (!task->ctx.started)
390362
return;
391-
}
392-
393-
jl_ptls_t ptls2 = task->ptls;
394-
if (task->ctx.copy_stack && ptls2) {
395-
*total_start = *active_start = (char*)ptls2->stackbase - ptls2->stacksize;
396-
*total_end = *active_end = (char*)ptls2->stackbase;
397-
}
398-
else if (task->ctx.stkbuf) {
399-
*total_start = *active_start = (char*)task->ctx.stkbuf;
400-
#ifndef _OS_WINDOWS_
401-
jl_ptls_t ptls0 = jl_atomic_load_relaxed(&jl_all_tls_states)[0];
402-
if (ptls0->root_task == task) {
403-
// See jl_init_root_task(). The root task of the main thread
404-
// has its buffer enlarged by an artificial 3000000 bytes, but
405-
// that means that the start of the buffer usually points to
406-
// inaccessible memory. We need to correct for this.
407-
*active_start += ROOT_TASK_STACK_ADJUSTMENT;
408-
*total_start += ROOT_TASK_STACK_ADJUSTMENT;
409-
}
410-
#endif
411363

412-
*total_end = *active_end = (char*)task->ctx.stkbuf + task->ctx.bufsz;
413-
#ifdef COPY_STACKS
414-
// save_stack stores the stack of an inactive task in stkbuf, and the
415-
// actual number of used bytes in copy_stack.
416-
if (task->ctx.copy_stack > 1)
364+
if (task->ctx.copy_stack) {
365+
if (task->ctx.ctx) {
366+
// currently paused task
367+
// save_stack stores the stack of an inactive task in stkbuf, and the
368+
// actual number of used bytes in copy_stack.
369+
*active_start = *total_start = (char*)task->ctx.stkbuf;
417370
*active_end = (char*)task->ctx.stkbuf + task->ctx.copy_stack;
418-
#endif
371+
*total_end = (char*)task->ctx.stkbuf + task->ctx.bufsz;
372+
}
373+
else if (task->ptls) {
374+
jl_ptls_t ptls2 = task->ptls;
375+
// currently running copy task
376+
*active_start = (char*)task->ctx.activefp;
377+
*total_start = (char*)ptls2->stackbase - ptls2->stacksize;
378+
*total_end = *active_end = (char*)ptls2->stackbase;
379+
}
419380
}
420381
else {
421-
// no stack allocated yet
422-
*total_start = *active_start = 0;
423-
*total_end = *active_end = 0;
424-
return;
425-
}
426-
427-
if (task == jl_current_task) {
428-
// scan up to current `sp` for current thread and task
429-
*active_start = (char*)jl_get_frame_addr();
382+
if (task->ctx.stkbuf) {
383+
// currently running task
384+
*active_start = (char*)task->ctx.activefp;
385+
*total_start = (char*)task->ctx.stkbuf;
386+
*active_end = *total_end = (char*)task->ctx.stkbuf + task->ctx.bufsz;
387+
}
430388
}
431389
}
432390

@@ -534,6 +492,7 @@ JL_NO_ASAN static void ctx_switch(jl_task_t *lastt)
534492
}
535493

536494
// set up global state for new task and clear global state for old task
495+
lastt->ctx.activefp = (char*)jl_get_frame_addr();
537496
t->ptls = ptls;
538497
jl_atomic_store_relaxed(&ptls->current_task, t);
539498
JL_GC_PROMISE_ROOTED(t);

0 commit comments

Comments
 (0)