Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
100 changes: 60 additions & 40 deletions src/gc-alloc-profiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ struct RawAlloc {
size_t size;
};

struct AllocProfile {
int skip_every;

struct PerThreadAllocProfile {
vector<RawAlloc> allocs;
unordered_map<size_t, size_t> type_address_by_value_address;
unordered_map<size_t, size_t> frees_by_type_address;
Expand All @@ -35,11 +33,22 @@ struct AllocProfile {
size_t last_recorded_alloc;
};

struct AllocProfile {
int skip_every;

vector<PerThreadAllocProfile> per_thread_profiles;
};

struct CombinedResults {
vector<RawAlloc> combined_allocs;
vector<FreeInfo> combined_frees;
};

// == global variables manipulated by callbacks ==

AllocProfile g_alloc_profile;
RawAllocResults *g_alloc_profile_results = nullptr;
int g_alloc_profile_enabled = false;
CombinedResults g_combined_results; // will live forever

// === stack stuff ===

Expand All @@ -58,61 +67,70 @@ RawBacktrace get_raw_backtrace() {
// == exported interface ==

JL_DLLEXPORT void jl_start_alloc_profile(int skip_every) {
g_alloc_profile_enabled = true;
g_alloc_profile = AllocProfile{skip_every};

for (int i = 0; i < jl_n_threads; i++) {
g_alloc_profile.per_thread_profiles.push_back(PerThreadAllocProfile{});
}

g_alloc_profile_enabled = true;
}

extern "C" { // Needed since the function doesn't take any arguments.

JL_DLLEXPORT struct RawAllocResults* jl_stop_alloc_profile() {
JL_DLLEXPORT struct RawAllocResults jl_stop_alloc_profile() {
g_alloc_profile_enabled = false;

auto results = new RawAllocResults{
g_alloc_profile.allocs.data(),
g_alloc_profile.allocs.size()
};
// combine allocs
// TODO: interleave to preserve ordering
for (const auto& profile : g_alloc_profile.per_thread_profiles) {
for (const auto& alloc : profile.allocs) {
g_combined_results.combined_allocs.push_back(alloc);
}
}

// package up frees
results->num_frees = g_alloc_profile.frees_by_type_address.size();
results->frees = (FreeInfo*) malloc(sizeof(FreeInfo) * results->num_frees);
int j = 0;
for (auto type_addr_free_count : g_alloc_profile.frees_by_type_address) {
results->frees[j++] = FreeInfo{
type_addr_free_count.first,
type_addr_free_count.second
};
for (const auto& profile : g_alloc_profile.per_thread_profiles) {
for (const auto& free_info : profile.frees_by_type_address) {
g_combined_results.combined_frees.push_back(FreeInfo{
free_info.first,
free_info.second
});
}
}

g_alloc_profile_results = results;

return results;
return RawAllocResults{
g_combined_results.combined_allocs.data(),
g_combined_results.combined_allocs.size(),
g_combined_results.combined_frees.data(),
g_combined_results.combined_frees.size()
};
}

JL_DLLEXPORT void jl_free_alloc_profile() {
g_alloc_profile.frees_by_type_address.clear();
g_alloc_profile.type_address_by_value_address.clear();
g_alloc_profile.alloc_counter = 0;
for (auto alloc : g_alloc_profile.allocs) {
free(alloc.backtrace.data);
for (auto profile : g_alloc_profile.per_thread_profiles) {
for (auto alloc : profile.allocs) {
free(alloc.backtrace.data);
}
}
g_alloc_profile.allocs.clear();
g_alloc_profile.per_thread_profiles.clear();

if (g_alloc_profile_results != nullptr) {
free(g_alloc_profile_results->frees);
// free the results?
g_alloc_profile_results = nullptr;
}
g_combined_results.combined_allocs.clear();
g_combined_results.combined_frees.clear();
}

}

// == callbacks called into by the outside ==

void _record_allocated_value(jl_value_t *val, size_t size) JL_NOTSAFEPOINT {
auto& profile = g_alloc_profile;
auto& global_profile = g_alloc_profile;

auto& profile = global_profile.per_thread_profiles[jl_threadid()];

profile.alloc_counter++;
auto diff = profile.alloc_counter - profile.last_recorded_alloc;
if (diff < profile.skip_every) {
if (diff < g_alloc_profile.skip_every) {
return;
}
profile.last_recorded_alloc = profile.alloc_counter;
Expand All @@ -131,17 +149,19 @@ void _record_allocated_value(jl_value_t *val, size_t size) JL_NOTSAFEPOINT {
void _record_freed_value(jl_taggedvalue_t *tagged_val) JL_NOTSAFEPOINT {
jl_value_t *val = jl_valueof(tagged_val);

auto& profile = g_alloc_profile.per_thread_profiles[jl_threadid()];

auto value_address = (size_t)val;
auto type_address = g_alloc_profile.type_address_by_value_address.find(value_address);
if (type_address == g_alloc_profile.type_address_by_value_address.end()) {
auto type_address = profile.type_address_by_value_address.find(value_address);
if (type_address == profile.type_address_by_value_address.end()) {
return; // TODO: warn
}
auto frees = g_alloc_profile.frees_by_type_address.find(type_address->second);
auto frees = profile.frees_by_type_address.find(type_address->second);

if (frees == g_alloc_profile.frees_by_type_address.end()) {
g_alloc_profile.frees_by_type_address[type_address->second] = 1;
if (frees == profile.frees_by_type_address.end()) {
profile.frees_by_type_address[type_address->second] = 1;
} else {
g_alloc_profile.frees_by_type_address[type_address->second] = frees->second + 1;
profile.frees_by_type_address[type_address->second] = frees->second + 1;
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/gc-alloc-profiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void _report_gc_finished(
uint64_t pause, uint64_t freed, uint64_t allocd, int full, int recollect
) JL_NOTSAFEPOINT;
JL_DLLEXPORT void jl_start_alloc_profile(int skip_every);
JL_DLLEXPORT struct RawAllocResults *jl_stop_alloc_profile(void);
JL_DLLEXPORT struct RawAllocResults jl_stop_alloc_profile(void);
JL_DLLEXPORT void jl_free_alloc_profile(void);

void _record_allocated_value(jl_value_t *val, size_t size) JL_NOTSAFEPOINT;
Expand Down
32 changes: 19 additions & 13 deletions stdlib/AllocProfile/src/AllocProfile.jl
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@ struct RawAlloc
size::Csize_t
end

struct TypeNamePair
addr::Csize_t
name::Ptr{UInt8}
end

struct FreeInfo
type::Ptr{Type}
count::UInt
Expand All @@ -42,8 +37,7 @@ function start(skip_every::Int=0)
end

function stop()
raw_results_ptr = ccall(:jl_stop_alloc_profile, Ptr{RawAllocResults}, ())
raw_results = unsafe_load(raw_results_ptr)
raw_results = ccall(:jl_stop_alloc_profile, RawAllocResults, ())
decoded_results = decode(raw_results)
return decoded_results
end
Expand All @@ -66,7 +60,7 @@ end
# decoded results

struct Alloc
type::Type
type::Any
stacktrace::StackTrace
size::Int
end
Expand All @@ -85,13 +79,15 @@ const BacktraceCache = Dict{BacktraceEntry,Vector{StackFrame}}

# loading anything below this seems to segfault
# TODO: find out what's going on
TYPE_PTR_THRESHOLD = 0x0000000100000000
TYPE_PTR_LOW_THRESHOLD = 0x0000000100000000
TYPE_PTR_HIGH_THRESHOLD = 100000000000000

function load_type(ptr::Ptr{Type})::Type
if UInt(ptr) < TYPE_PTR_THRESHOLD
return Missing
function load_type(ptr::Ptr{Type})
# println("type: $(UInt(ptr))")
if TYPE_PTR_LOW_THRESHOLD < UInt(ptr) < TYPE_PTR_HIGH_THRESHOLD
return unsafe_pointer_to_objref(ptr)
end
return unsafe_pointer_to_objref(ptr)
return Missing
end

function decode_alloc(cache::BacktraceCache, raw_alloc::RawAlloc)::Alloc
Expand Down Expand Up @@ -122,11 +118,21 @@ function decode(raw_results::RawAllocResults)::AllocResults
)
end

const f = Ref{IOStream}()

function __init__()
f[] = open("debug.log", "w")
end

function load_backtrace(trace::RawBacktrace)::Vector{Ptr{Cvoid}}
println(f[], "load_backtrace: trace.data: $(trace.data)")
println(f[], "load_backtrace: trace.size: $(trace.size)")
out = Vector{Ptr{Cvoid}}()
for i in 1:trace.size
println(f[], " $i")
push!(out, unsafe_load(trace.data, i))
end

return out
end

Expand Down
2 changes: 1 addition & 1 deletion stdlib/AllocProfile/test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ using AllocProfile
using Base64

results = AllocProfile.stop()
AllocProfile.clear()
# AllocProfile.clear()

@test length(results.allocs) > 0
first_alloc = results.allocs[1]
Expand Down