From 205964153670b3b4546973eec4554ff1dc7e3b98 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Sat, 17 May 2025 16:54:58 +0000 Subject: [PATCH 1/2] avoid deadlock if crashing inside profile_wr_lock The rd/wr lock distinction here was supposed to help prevent deadlocks by allowing recursion (even over signals), but did not account for crashes causing recursion while holding the wr lock. Make these lock acquires fail-able if they would cause deadlock. --- src/debuginfo.cpp | 18 +++++++++-------- src/julia_internal.h | 5 ++--- src/signal-handling.c | 45 ++++++++++++++++++++++++++++++++++++------- src/threading.c | 8 +++++--- 4 files changed, 55 insertions(+), 21 deletions(-) diff --git a/src/debuginfo.cpp b/src/debuginfo.cpp index 53c07c978f72a..e114dccfe5772 100644 --- a/src/debuginfo.cpp +++ b/src/debuginfo.cpp @@ -145,8 +145,8 @@ struct unw_table_entry template static void jl_profile_atomic(T f) JL_NOTSAFEPOINT { - assert(0 == jl_lock_profile_rd_held()); - jl_lock_profile_wr(); + int havelock = jl_lock_profile_wr(); + assert(havelock); #ifndef _OS_WINDOWS_ sigset_t sset; sigset_t oset; @@ -157,7 +157,8 @@ static void jl_profile_atomic(T f) JL_NOTSAFEPOINT #ifndef _OS_WINDOWS_ pthread_sigmask(SIG_SETMASK, &oset, NULL); #endif - jl_unlock_profile_wr(); + if (havelock) + jl_unlock_profile_wr(); } @@ -464,8 +465,8 @@ static int lookup_pointer( // DWARFContext/DWARFUnit update some internal tables during these queries, so // a lock is needed. - assert(0 == jl_lock_profile_rd_held()); - jl_lock_profile_wr(); + if (!jl_lock_profile_wr()) + return lookup_pointer(object::SectionRef(), NULL, frames, pointer, slide, demangle, noInline); auto inlineInfo = context->getInliningInfoForAddress(makeAddress(Section, pointer + slide), infoSpec); jl_unlock_profile_wr(); @@ -490,7 +491,8 @@ static int lookup_pointer( info = inlineInfo.getFrame(i); } else { - jl_lock_profile_wr(); + int havelock = jl_lock_profile_wr(); + assert(havelock); (void)havelock; info = context->getLineInfoForAddress(makeAddress(Section, pointer + slide), infoSpec); jl_unlock_profile_wr(); } @@ -1198,8 +1200,8 @@ int jl_DI_for_fptr(uint64_t fptr, uint64_t *symsize, int64_t *slide, object::SectionRef *Section, llvm::DIContext **context) JL_NOTSAFEPOINT { int found = 0; - assert(0 == jl_lock_profile_rd_held()); - jl_lock_profile_wr(); + if (!jl_lock_profile_wr()) + return 0; if (symsize) *symsize = 0; diff --git a/src/julia_internal.h b/src/julia_internal.h index 938e87935578c..d4167d3e8a5b9 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -210,10 +210,9 @@ JL_DLLEXPORT double jl_get_profile_peek_duration(void); JL_DLLEXPORT void jl_set_profile_peek_duration(double); JL_DLLEXPORT void jl_init_profile_lock(void); -JL_DLLEXPORT uintptr_t jl_lock_profile_rd_held(void) JL_NOTSAFEPOINT; -JL_DLLEXPORT void jl_lock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER; +JL_DLLEXPORT int jl_lock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER; JL_DLLEXPORT void jl_unlock_profile(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE; -JL_DLLEXPORT void jl_lock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER; +JL_DLLEXPORT int jl_lock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_ENTER; JL_DLLEXPORT void jl_unlock_profile_wr(void) JL_NOTSAFEPOINT JL_NOTSAFEPOINT_LEAVE; void jl_with_stackwalk_lock(void (*f)(void*) JL_NOTSAFEPOINT, void *ctx) JL_NOTSAFEPOINT; diff --git a/src/signal-handling.c b/src/signal-handling.c index ff073cc82a0a5..5056ebce8b300 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -102,7 +102,7 @@ void jl_init_profile_lock(void) #endif } -uintptr_t jl_lock_profile_rd_held(void) +static uintptr_t jl_lock_profile_rd_held(void) { #ifndef _OS_WINDOWS_ return (uintptr_t)pthread_getspecific(debuginfo_asyncsafe_held); @@ -111,38 +111,69 @@ uintptr_t jl_lock_profile_rd_held(void) #endif } -void jl_lock_profile(void) +int jl_lock_profile(void) { uintptr_t held = jl_lock_profile_rd_held(); - if (held++ == 0) + if (held == -1) + return 0; + if (held == 0) { + held = -1; +#ifndef _OS_WINDOWS_ + pthread_setspecific(debuginfo_asyncsafe_held, (void*)held); +#else + TlsSetValue(debuginfo_asyncsafe_held, (void*)held); +#endif uv_rwlock_rdlock(&debuginfo_asyncsafe); + held = 0; + } + held++; #ifndef _OS_WINDOWS_ pthread_setspecific(debuginfo_asyncsafe_held, (void*)held); #else TlsSetValue(debuginfo_asyncsafe_held, (void*)held); #endif + return 1; } JL_DLLEXPORT void jl_unlock_profile(void) { uintptr_t held = jl_lock_profile_rd_held(); - assert(held); - if (--held == 0) - uv_rwlock_rdunlock(&debuginfo_asyncsafe); + assert(held && held != -1); + held--; #ifndef _OS_WINDOWS_ pthread_setspecific(debuginfo_asyncsafe_held, (void*)held); #else TlsSetValue(debuginfo_asyncsafe_held, (void*)held); #endif + if (held == 0) + uv_rwlock_rdunlock(&debuginfo_asyncsafe); } -void jl_lock_profile_wr(void) +int jl_lock_profile_wr(void) { + uintptr_t held = jl_lock_profile_rd_held(); + if (held) + return 0; + held = -1; +#ifndef _OS_WINDOWS_ + pthread_setspecific(debuginfo_asyncsafe_held, (void*)held); +#else + TlsSetValue(debuginfo_asyncsafe_held, (void*)held); +#endif uv_rwlock_wrlock(&debuginfo_asyncsafe); + return 1; } void jl_unlock_profile_wr(void) { + uintptr_t held = jl_lock_profile_rd_held(); + assert(held == -1); + held = 0; +#ifndef _OS_WINDOWS_ + pthread_setspecific(debuginfo_asyncsafe_held, (void*)held); +#else + TlsSetValue(debuginfo_asyncsafe_held, (void*)held); +#endif uv_rwlock_wrunlock(&debuginfo_asyncsafe); } diff --git a/src/threading.c b/src/threading.c index 4256115214fc2..97b66d2e2068f 100644 --- a/src/threading.c +++ b/src/threading.c @@ -559,18 +559,20 @@ static void jl_delete_thread(void *value) JL_NOTSAFEPOINT_ENTER // this here by blocking. This also synchronizes our read of `current_task` // (which is the flag we currently use to check the liveness state of a thread). #ifdef _OS_WINDOWS_ - jl_lock_profile_wr(); + int havelock = jl_lock_profile_wr(); + assert(havelock); (void)havelock; #elif defined(JL_DISABLE_LIBUNWIND) // nothing #elif defined(__APPLE__) - jl_lock_profile_wr(); + int havelock = jl_lock_profile_wr(); + assert(havelock); (void)havelock; #else pthread_mutex_lock(&in_signal_lock); #endif jl_atomic_store_relaxed(&ptls->current_task, NULL); // indicate dead // finally, release all of the locks we had grabbed #ifdef _OS_WINDOWS_ - jl_unlock_profile_wr(); + if (havelock) jl_unlock_profile_wr(); #elif defined(JL_DISABLE_LIBUNWIND) // nothing #elif defined(__APPLE__) From 23813bc0892902984d3ada477a142062f157fdbe Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Mon, 19 May 2025 12:58:07 -0400 Subject: [PATCH 2/2] Update signal-handling.c --- src/signal-handling.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/signal-handling.c b/src/signal-handling.c index 5056ebce8b300..6e19028ca7940 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -102,7 +102,7 @@ void jl_init_profile_lock(void) #endif } -static uintptr_t jl_lock_profile_rd_held(void) +static uintptr_t jl_lock_profile_rd_held(void) JL_NOTSAFEPOINT { #ifndef _OS_WINDOWS_ return (uintptr_t)pthread_getspecific(debuginfo_asyncsafe_held);