diff --git a/src/gc-alloc-profiler.cpp b/src/gc-alloc-profiler.cpp index eff7a2144595f..1497bc7ff3ed5 100644 --- a/src/gc-alloc-profiler.cpp +++ b/src/gc-alloc-profiler.cpp @@ -24,9 +24,7 @@ struct RawAlloc { size_t size; }; -struct AllocProfile { - int skip_every; - +struct PerThreadAllocProfile { vector allocs; unordered_map type_address_by_value_address; unordered_map frees_by_type_address; @@ -35,11 +33,22 @@ struct AllocProfile { size_t last_recorded_alloc; }; +struct AllocProfile { + int skip_every; + + vector per_thread_profiles; +}; + +struct CombinedResults { + vector combined_allocs; + vector combined_frees; +}; + // == global variables manipulated by callbacks == AllocProfile g_alloc_profile; -RawAllocResults *g_alloc_profile_results = nullptr; int g_alloc_profile_enabled = false; +CombinedResults g_combined_results; // will live forever // === stack stuff === @@ -58,50 +67,56 @@ RawBacktrace get_raw_backtrace() { // == exported interface == JL_DLLEXPORT void jl_start_alloc_profile(int skip_every) { - g_alloc_profile_enabled = true; g_alloc_profile = AllocProfile{skip_every}; + + for (int i = 0; i < jl_n_threads; i++) { + g_alloc_profile.per_thread_profiles.push_back(PerThreadAllocProfile{}); + } + + g_alloc_profile_enabled = true; } extern "C" { // Needed since the function doesn't take any arguments. -JL_DLLEXPORT struct RawAllocResults* jl_stop_alloc_profile() { +JL_DLLEXPORT struct RawAllocResults jl_stop_alloc_profile() { g_alloc_profile_enabled = false; - auto results = new RawAllocResults{ - g_alloc_profile.allocs.data(), - g_alloc_profile.allocs.size() - }; + // combine allocs + // TODO: interleave to preserve ordering + for (const auto& profile : g_alloc_profile.per_thread_profiles) { + for (const auto& alloc : profile.allocs) { + g_combined_results.combined_allocs.push_back(alloc); + } + } // package up frees - results->num_frees = g_alloc_profile.frees_by_type_address.size(); - results->frees = (FreeInfo*) malloc(sizeof(FreeInfo) * results->num_frees); - int j = 0; - for (auto type_addr_free_count : g_alloc_profile.frees_by_type_address) { - results->frees[j++] = FreeInfo{ - type_addr_free_count.first, - type_addr_free_count.second - }; + for (const auto& profile : g_alloc_profile.per_thread_profiles) { + for (const auto& free_info : profile.frees_by_type_address) { + g_combined_results.combined_frees.push_back(FreeInfo{ + free_info.first, + free_info.second + }); + } } - g_alloc_profile_results = results; - - return results; + return RawAllocResults{ + g_combined_results.combined_allocs.data(), + g_combined_results.combined_allocs.size(), + g_combined_results.combined_frees.data(), + g_combined_results.combined_frees.size() + }; } JL_DLLEXPORT void jl_free_alloc_profile() { - g_alloc_profile.frees_by_type_address.clear(); - g_alloc_profile.type_address_by_value_address.clear(); - g_alloc_profile.alloc_counter = 0; - for (auto alloc : g_alloc_profile.allocs) { - free(alloc.backtrace.data); + for (auto profile : g_alloc_profile.per_thread_profiles) { + for (auto alloc : profile.allocs) { + free(alloc.backtrace.data); + } } - g_alloc_profile.allocs.clear(); + g_alloc_profile.per_thread_profiles.clear(); - if (g_alloc_profile_results != nullptr) { - free(g_alloc_profile_results->frees); - // free the results? - g_alloc_profile_results = nullptr; - } + g_combined_results.combined_allocs.clear(); + g_combined_results.combined_frees.clear(); } } @@ -109,10 +124,13 @@ JL_DLLEXPORT void jl_free_alloc_profile() { // == callbacks called into by the outside == void _record_allocated_value(jl_value_t *val, size_t size) JL_NOTSAFEPOINT { - auto& profile = g_alloc_profile; + auto& global_profile = g_alloc_profile; + + auto& profile = global_profile.per_thread_profiles[jl_threadid()]; + profile.alloc_counter++; auto diff = profile.alloc_counter - profile.last_recorded_alloc; - if (diff < profile.skip_every) { + if (diff < g_alloc_profile.skip_every) { return; } profile.last_recorded_alloc = profile.alloc_counter; @@ -131,17 +149,19 @@ void _record_allocated_value(jl_value_t *val, size_t size) JL_NOTSAFEPOINT { void _record_freed_value(jl_taggedvalue_t *tagged_val) JL_NOTSAFEPOINT { jl_value_t *val = jl_valueof(tagged_val); + auto& profile = g_alloc_profile.per_thread_profiles[jl_threadid()]; + auto value_address = (size_t)val; - auto type_address = g_alloc_profile.type_address_by_value_address.find(value_address); - if (type_address == g_alloc_profile.type_address_by_value_address.end()) { + auto type_address = profile.type_address_by_value_address.find(value_address); + if (type_address == profile.type_address_by_value_address.end()) { return; // TODO: warn } - auto frees = g_alloc_profile.frees_by_type_address.find(type_address->second); + auto frees = profile.frees_by_type_address.find(type_address->second); - if (frees == g_alloc_profile.frees_by_type_address.end()) { - g_alloc_profile.frees_by_type_address[type_address->second] = 1; + if (frees == profile.frees_by_type_address.end()) { + profile.frees_by_type_address[type_address->second] = 1; } else { - g_alloc_profile.frees_by_type_address[type_address->second] = frees->second + 1; + profile.frees_by_type_address[type_address->second] = frees->second + 1; } } diff --git a/src/gc-alloc-profiler.h b/src/gc-alloc-profiler.h index 52d932226596b..dab52c1ecd753 100644 --- a/src/gc-alloc-profiler.h +++ b/src/gc-alloc-profiler.h @@ -34,7 +34,7 @@ void _report_gc_finished( uint64_t pause, uint64_t freed, uint64_t allocd, int full, int recollect ) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_start_alloc_profile(int skip_every); -JL_DLLEXPORT struct RawAllocResults *jl_stop_alloc_profile(void); +JL_DLLEXPORT struct RawAllocResults jl_stop_alloc_profile(void); JL_DLLEXPORT void jl_free_alloc_profile(void); void _record_allocated_value(jl_value_t *val, size_t size) JL_NOTSAFEPOINT; diff --git a/stdlib/AllocProfile/src/AllocProfile.jl b/stdlib/AllocProfile/src/AllocProfile.jl index 0bedd30f31aa7..3a5e74eca8f8f 100644 --- a/stdlib/AllocProfile/src/AllocProfile.jl +++ b/stdlib/AllocProfile/src/AllocProfile.jl @@ -18,11 +18,6 @@ struct RawAlloc size::Csize_t end -struct TypeNamePair - addr::Csize_t - name::Ptr{UInt8} -end - struct FreeInfo type::Ptr{Type} count::UInt @@ -42,8 +37,7 @@ function start(skip_every::Int=0) end function stop() - raw_results_ptr = ccall(:jl_stop_alloc_profile, Ptr{RawAllocResults}, ()) - raw_results = unsafe_load(raw_results_ptr) + raw_results = ccall(:jl_stop_alloc_profile, RawAllocResults, ()) decoded_results = decode(raw_results) return decoded_results end @@ -66,7 +60,7 @@ end # decoded results struct Alloc - type::Type + type::Any stacktrace::StackTrace size::Int end @@ -85,13 +79,15 @@ const BacktraceCache = Dict{BacktraceEntry,Vector{StackFrame}} # loading anything below this seems to segfault # TODO: find out what's going on -TYPE_PTR_THRESHOLD = 0x0000000100000000 +TYPE_PTR_LOW_THRESHOLD = 0x0000000100000000 +TYPE_PTR_HIGH_THRESHOLD = 100000000000000 -function load_type(ptr::Ptr{Type})::Type - if UInt(ptr) < TYPE_PTR_THRESHOLD - return Missing +function load_type(ptr::Ptr{Type}) + # println("type: $(UInt(ptr))") + if TYPE_PTR_LOW_THRESHOLD < UInt(ptr) < TYPE_PTR_HIGH_THRESHOLD + return unsafe_pointer_to_objref(ptr) end - return unsafe_pointer_to_objref(ptr) + return Missing end function decode_alloc(cache::BacktraceCache, raw_alloc::RawAlloc)::Alloc @@ -122,11 +118,21 @@ function decode(raw_results::RawAllocResults)::AllocResults ) end +const f = Ref{IOStream}() + +function __init__() + f[] = open("debug.log", "w") +end + function load_backtrace(trace::RawBacktrace)::Vector{Ptr{Cvoid}} + println(f[], "load_backtrace: trace.data: $(trace.data)") + println(f[], "load_backtrace: trace.size: $(trace.size)") out = Vector{Ptr{Cvoid}}() for i in 1:trace.size + println(f[], " $i") push!(out, unsafe_load(trace.data, i)) end + return out end diff --git a/stdlib/AllocProfile/test/runtests.jl b/stdlib/AllocProfile/test/runtests.jl index c916df2edb9ab..21140f5cb7b17 100644 --- a/stdlib/AllocProfile/test/runtests.jl +++ b/stdlib/AllocProfile/test/runtests.jl @@ -12,7 +12,7 @@ using AllocProfile using Base64 results = AllocProfile.stop() - AllocProfile.clear() + # AllocProfile.clear() @test length(results.allocs) > 0 first_alloc = results.allocs[1]