diff --git a/base/gcutils.jl b/base/gcutils.jl index d17301a1be9b0..5ad3795cd91bc 100644 --- a/base/gcutils.jl +++ b/base/gcutils.jl @@ -105,6 +105,23 @@ Control whether garbage collection is enabled using a boolean argument (`true` f """ enable(on::Bool) = ccall(:jl_gc_enable, Int32, (Int32,), on) != 0 +""" + GC.take_heap_snapshot(io::IOStream) + GC.take_heap_snapshot(filepath::String) + +Write a snapshot of the heap, in the JSON format expected by the Chrome +Devtools Heap Snapshot viewer (.heapsnapshot extension), to the given +IO stream. +""" +function take_heap_snapshot(io) + ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid},), (io::IOStream).handle::Ptr{Cvoid}) +end +function take_heap_snapshot(filepath::String) + open(filepath, "w") do io + take_heap_snapshot(io) + end +end + """ GC.enable_finalizers(on::Bool) diff --git a/src/Makefile b/src/Makefile index b7235597fd08c..d08f05c5f101f 100644 --- a/src/Makefile +++ b/src/Makefile @@ -45,8 +45,9 @@ RUNTIME_SRCS := \ jltypes gf typemap smallintset ast builtins module interpreter symbol \ dlload sys init task array dump staticdata toplevel jl_uv datatype \ simplevector runtime_intrinsics precompile \ - threading partr stackwalk gc gc-debug gc-pages gc-stacks gc-alloc-profiler method \ - jlapi signal-handling safepoint timing subtype \ + threading partr stackwalk \ + gc gc-debug gc-heap-snapshot gc-alloc-profiler gc-pages gc-stacks \ + method jlapi signal-handling safepoint timing subtype \ crc32c APInt-C processor ircode opaque_closure codegen-stubs coverage SRCS := jloptions runtime_ccall rtutils ifeq ($(OS),WINNT) @@ -289,7 +290,7 @@ $(BUILDDIR)/disasm.o $(BUILDDIR)/disasm.dbg.obj: $(SRCDIR)/debuginfo.h $(SRCDIR) $(BUILDDIR)/dump.o $(BUILDDIR)/dump.dbg.obj: $(addprefix $(SRCDIR)/,common_symbols1.inc common_symbols2.inc builtin_proto.h serialize.h) $(BUILDDIR)/gc-debug.o $(BUILDDIR)/gc-debug.dbg.obj: $(SRCDIR)/gc.h $(BUILDDIR)/gc-pages.o $(BUILDDIR)/gc-pages.dbg.obj: $(SRCDIR)/gc.h -$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-alloc-profiler.h +$(BUILDDIR)/gc.o $(BUILDDIR)/gc.dbg.obj: $(SRCDIR)/gc.h $(SRCDIR)/gc-heap-snapshot.h $(SRCDIR)/gc-alloc-profiler.h $(BUILDDIR)/init.o $(BUILDDIR)/init.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/interpreter.o $(BUILDDIR)/interpreter.dbg.obj: $(SRCDIR)/builtin_proto.h $(BUILDDIR)/jitlayers.o $(BUILDDIR)/jitlayers.dbg.obj: $(SRCDIR)/jitlayers.h $(SRCDIR)/codegen_shared.h diff --git a/src/gc-debug.c b/src/gc-debug.c index 8403a9f9f2e1b..5ec3889ea9b31 100644 --- a/src/gc-debug.c +++ b/src/gc-debug.c @@ -1205,10 +1205,10 @@ void gc_count_pool(void) jl_safe_printf("************************\n"); } -int gc_slot_to_fieldidx(void *obj, void *slot) +int gc_slot_to_fieldidx(void *obj, void *slot) JL_NOTSAFEPOINT { jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); - int nf = (int)jl_datatype_nfields(vt); + int nf = (int)jl_datatype_nfields(vt); // what happens if you're inlined? lol for (int i = 0; i < nf; i++) { void *fieldaddr = (char*)obj + jl_field_offset(vt, i); if (fieldaddr >= slot) { @@ -1218,7 +1218,7 @@ int gc_slot_to_fieldidx(void *obj, void *slot) return -1; } -int gc_slot_to_arrayidx(void *obj, void *_slot) +int gc_slot_to_arrayidx(void *obj, void *_slot) JL_NOTSAFEPOINT { char *slot = (char*)_slot; jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp new file mode 100644 index 0000000000000..e436e6b12605b --- /dev/null +++ b/src/gc-heap-snapshot.cpp @@ -0,0 +1,542 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#include "gc-heap-snapshot.h" + +#include "julia_internal.h" +#include "gc.h" + +#include +#include +#include +#include +#include +#include +#include + +using std::vector; +using std::string; +using std::ostringstream; +using std::pair; +using std::unordered_map; +using std::unordered_set; + +int gc_heap_snapshot_enabled = 0; + +// https://stackoverflow.com/a/33799784/751061 +void print_str_escape_json(ios_t *stream, const std::string &s) { + ios_printf(stream, "\""); + for (auto c = s.cbegin(); c != s.cend(); c++) { + switch (*c) { + case '"': ios_printf(stream, "\\\""); break; + case '\\': ios_printf(stream, "\\\\"); break; + case '\b': ios_printf(stream, "\\b"); break; + case '\f': ios_printf(stream, "\\f"); break; + case '\n': ios_printf(stream, "\\n"); break; + case '\r': ios_printf(stream, "\\r"); break; + case '\t': ios_printf(stream, "\\t"); break; + default: + if ('\x00' <= *c && *c <= '\x1f') { + ios_printf(stream, "\\u%04x", (int)*c); + } else { + ios_printf(stream, "%c", *c); + } + } + } + ios_printf(stream, "\""); +} + + +// Edges +// "edge_fields": +// [ "type", "name_or_index", "to_node" ] +// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2598-L2601 + +struct Edge { + size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. + size_t name_or_index; // name of the field (for objects/modules) or index of array + size_t to_node; + + // Book-keeping fields (not used for serialization) +}; + +// Nodes +// "node_fields": +// [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ] +// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2568-L2575 + +const int k_node_number_of_fields = 7; +struct Node { + size_t type; // index into snapshot->node_types + string name; + size_t id; // This should be a globally-unique counter, but we use the memory address + size_t self_size; + size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. + // whether the from_node is attached or dettached from the main application state + // TODO: .... meaning not yet understood. + // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 + int detachedness; // 0 - unknown, 1 - attached, 2 - detached + + // Book-keeping fields (not used for serialization) + vector edges; // For asserting that we built the edges in the right order +}; + +struct StringTable { + typedef unordered_map MapType; + + MapType map; + vector strings; + + StringTable() {} + StringTable(std::initializer_list strs) : strings(strs) { + for (const auto& str : strs) { + map.insert({str, map.size()}); + } + } + + size_t find_or_create_string_id(string key) { + auto val = map.find(key); + if (val == map.end()) { + val = map.insert(val, {key, map.size()}); + strings.push_back(key); + } + return val->second; + } + + void print_json_array(ios_t *stream, bool newlines) { + ios_printf(stream, "["); + bool first = true; + for (const auto &str : strings) { + if (first) { + first = false; + } else { + ios_printf(stream, newlines ? ",\n" : ","); + } + print_str_escape_json(stream, str); + } + ios_printf(stream, "]"); + } +}; + +struct HeapSnapshot { +public: + +// private: + vector nodes; + // edges are stored on each from_node + + StringTable names; + StringTable node_types; + StringTable edge_types; + unordered_map node_ptr_to_index_map; + + size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. +}; + +// global heap snapshot, mutated by garbage collector +// when snapshotting is on. +HeapSnapshot *g_snapshot = nullptr; + + +void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot); +static inline void _record_gc_edge(const char *node_type, const char *edge_type, + jl_value_t *a, jl_value_t *b, size_t name_or_index); +void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx); +void _add_internal_root(HeapSnapshot *snapshot); + + +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream) { + // Enable snapshotting + HeapSnapshot snapshot; + g_snapshot = &snapshot; + gc_heap_snapshot_enabled = true; + + _add_internal_root(&snapshot); + + // Initialize the GC's heuristics, so that JL_GC_FULL will work correctly. :) + while (gc_num.pause < 2) { + jl_gc_collect(JL_GC_AUTO); + } + + // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot` + jl_gc_collect(JL_GC_FULL); + + // Disable snapshotting + gc_heap_snapshot_enabled = false; + g_snapshot = nullptr; + + // When we return, the snapshot is full + // Dump the snapshot + serialize_heap_snapshot((ios_t*)stream, snapshot); +} + +// adds a node at id 0 which is the "uber root": +// a synthetic node which points to all the GC roots. +void _add_internal_root(HeapSnapshot *snapshot) { + Node internal_root{ + snapshot->node_types.find_or_create_string_id("synthetic"), + "(internal root)", // name + 0, // id + 1, // size + + 0, // size_t trace_node_id (unused) + 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + + // outgoing edges + vector(), + }; + snapshot->nodes.push_back(internal_root); +} + +// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 +// returns the index of the new node +size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { + auto val = g_snapshot->node_ptr_to_index_map.find((void*)a); + if (val != g_snapshot->node_ptr_to_index_map.end()) { + return val->second; + } + + // Insert a new Node + size_t self_size = 0; + string name = ""; + string node_type = "object"; + + if (a == (jl_value_t*)jl_malloc_tag) { + name = ""; + } else { + jl_datatype_t* type = (jl_datatype_t*)jl_typeof(a); + + if ((uintptr_t)type < 4096U) { + name = ""; + } else if (type == (jl_datatype_t*)jl_buff_tag) { + name = ""; + } else if (type == (jl_datatype_t*)jl_malloc_tag) { + name = ""; + } else if (jl_is_string(a)) { + node_type = "string"; + name = jl_string_data(a); + self_size = jl_string_len(a); + } else if (jl_is_symbol(a)) { + node_type = "symbol"; + name = jl_symbol_name((jl_sym_t*)a); + self_size = name.length(); + } else if (jl_is_datatype(type)) { + self_size = jl_is_array_type(type) + ? jl_array_nbytes((jl_array_t*)a) + : (size_t)jl_datatype_size(type); + + // print full type + // TODO(PR): Is it possible to use a variable size string here, instead?? + ios_t str_; + ios_mem(&str_, 1048576); // 1 MiB + JL_STREAM* str = (JL_STREAM*)&str_; + + jl_static_show(str, (jl_value_t*)type); + + name = string((const char*)str_.buf, str_.size); + ios_close(&str_); + } + } + + auto node_idx = g_snapshot->nodes.size(); + g_snapshot->node_ptr_to_index_map.insert(val, {a, node_idx}); + + Node from_node{ + // We pick a default type here, which will be set for the _targets_ of edges. + // TODO: What's a good default? + g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; + name, // string name; + (size_t)a, // size_t id; + // We add 1 to self-size for the type tag that all heap-allocated objects have. + // Also because the Chrome Snapshot viewer ignores size-0 leaves! + self_size + 1, // size_t self_size; + + 0, // size_t trace_node_id (unused) + 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + + // outgoing edges + vector(), + }; + g_snapshot->nodes.push_back(from_node); + + return node_idx; +} + +typedef pair inlineallocd_field_type_t; + +// TODO: remove this +static bool debug_log = false; + +bool _fieldpath_for_slot_helper( + vector& out, jl_datatype_t *objtype, + void *obj, void *slot) +{ + int nf = (int)jl_datatype_nfields(objtype); + jl_svec_t *field_names = jl_field_names(objtype); + if (debug_log) { + jl_((jl_value_t*)objtype); + jl_printf(JL_STDERR, "obj: %p, slot: %p, nf: %d\n", obj, (void*)slot, nf); + } + for (int i = 0; i < nf; i++) { + jl_datatype_t *field_type = (jl_datatype_t*)jl_field_type(objtype, i); + void *fieldaddr = (char*)obj + jl_field_offset(objtype, i); + ostringstream ss; // NOTE: must have same scope as field_name, below. + string field_name; + // TODO: NamedTuples should maybe have field names? Maybe another way to get them? + if (jl_is_tuple_type(objtype) || jl_is_namedtuple_type(objtype)) { + ss << "[" << i << "]"; + field_name = ss.str().c_str(); // See scope comment, above. + } else { + jl_sym_t *name = (jl_sym_t*)jl_svecref(field_names, i); + field_name = jl_symbol_name(name); + } + if (debug_log) { + jl_printf(JL_STDERR, "%d - field_name: %s fieldaddr: %p\n", i, field_name.c_str(), fieldaddr); + } + if (fieldaddr >= slot) { + out.push_back(inlineallocd_field_type_t(objtype, field_name)); + return true; + } + // If the field is an inline-allocated struct + if (jl_stored_inline((jl_value_t*)field_type)) { + bool found = _fieldpath_for_slot_helper(out, field_type, fieldaddr, slot); + if (found) { + out.push_back(inlineallocd_field_type_t(field_type, field_name)); + return true; + } + } + } + return false; +} + +vector _fieldpath_for_slot(jl_value_t *obj, void *slot) { + jl_datatype_t *vt = (jl_datatype_t*)jl_typeof(obj); + // TODO(PR): Remove this debugging code + if (vt->name->module == jl_main_module) { + // debug_log = true; + } + + vector result; + bool found = _fieldpath_for_slot_helper(result, vt, obj, slot); + + debug_log = false; + + // TODO: maybe don't need the return value here actually...? + if (!found) { + // TODO: Debug these failures. Some of them seem really wrong, like with the slot + // being _kilobytes_ past the start of the object for an object with 1 pointer and 1 + // field... + jl_printf(JL_STDERR, "WARNING: No fieldpath found for obj: %p slot: %p ", (void*)obj, (void*)slot); + jl_datatype_t* type = (jl_datatype_t*)jl_typeof(obj); + if (jl_is_datatype(type)) { + jl_printf(JL_STDERR, "typeof: "); + jl_static_show(JL_STDERR, (jl_value_t*)type); + } + jl_printf(JL_STDERR, "\n"); + } + // NOTE THE RETURNED VECTOR IS REVERSED + return result; +} + + +void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT { + record_node_to_gc_snapshot(root); + + auto &internal_root = g_snapshot->nodes.front(); + auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; + auto edge_label = g_snapshot->names.find_or_create_string_id(name); + + _record_gc_just_edge("internal", internal_root, to_node_idx, edge_label); +} + +// Add a node to the heap snapshot representing a Julia stack frame. +// Each task points at a stack frame, which points at the stack frame of +// the function it's currently calling, forming a linked list. +// Stack frame nodes point at the objects they have as local variables. +size_t _record_stack_frame_node(HeapSnapshot *snapshot, jl_gcframe_t *frame) { + auto val = g_snapshot->node_ptr_to_index_map.find((void*)frame); + if (val != g_snapshot->node_ptr_to_index_map.end()) { + return val->second; + } + + Node frame_node{ + snapshot->node_types.find_or_create_string_id("synthetic"), + "(stack frame)", // name + (size_t)frame, // id + 1, // size + + 0, // size_t trace_node_id (unused) + 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached + + // outgoing edges + vector(), + }; + + auto node_idx = snapshot->nodes.size(); + snapshot->node_ptr_to_index_map.insert(val, {frame, node_idx}); + snapshot->nodes.push_back(frame_node); + + return node_idx; +} + +void _gc_heap_snapshot_record_frame_to_object_edge(jl_gcframe_t *from, jl_value_t *to) JL_NOTSAFEPOINT { + auto from_node_idx = _record_stack_frame_node(g_snapshot, from); + Node &from_node = g_snapshot->nodes[from_node_idx]; + auto to_idx = record_node_to_gc_snapshot(to); + + // TODO: would be cool to get the name of the local var + auto name_idx = g_snapshot->names.find_or_create_string_id("local var"); + _record_gc_just_edge("internal", from_node, to_idx, name_idx); +} + +void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT { + auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)from); + Node &from_node = g_snapshot->nodes[from_node_idx]; + _record_stack_frame_node(g_snapshot, to); + auto to_node_idx = g_snapshot->node_ptr_to_index_map[to]; + + auto name_idx = g_snapshot->names.find_or_create_string_id("stack"); + _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); +} + +void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT { + auto from_node_idx = _record_stack_frame_node(g_snapshot, from); + Node &from_node = g_snapshot->nodes[from_node_idx]; + auto to_node_idx = _record_stack_frame_node(g_snapshot, to); + + auto name_idx = g_snapshot->names.find_or_create_string_id("next frame"); + _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); +} + +void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { + if (!g_snapshot) { + return; + } + _record_gc_edge("array", "element", from, to, index); +} + +void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT { + //jl_printf(JL_STDERR, "module: %p binding:%p name:%s\n", from, to, name); + _record_gc_edge("object", "property", (jl_value_t *)from, to, + g_snapshot->names.find_or_create_string_id(name)); +} + +void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { + jl_datatype_t *type = (jl_datatype_t*)jl_typeof(from); + + auto field_paths = _fieldpath_for_slot(from, slot); + // Build the new field name by joining the strings, and/or use the struct + field names + // to create a bunch of edges + nodes + // (iterate the vector in reverse - the last element is the first path) + // TODO: Prefer to create intermediate edges and nodes instead of a combined string path. + string path; + for (auto it = field_paths.rbegin(); it != field_paths.rend(); ++it) { + // ... + path += it->second; + if ( it + 1 != field_paths.rend() ) { + path += "."; + } + } + + _record_gc_edge("object", "property", from, to, + g_snapshot->names.find_or_create_string_id(path)); +} + +void _gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { + // TODO: probably need to inline this here and make some changes + _record_gc_edge("object", "internal", from, to, + g_snapshot->names.find_or_create_string_id("")); +} + +void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOTSAFEPOINT { + // TODO: probably need to inline this here and make some changes + _record_gc_edge("native", "hidden", from, (jl_value_t *)jl_malloc_tag, + g_snapshot->names.find_or_create_string_id("")); + + // Add the size to the "unknown malloc" tag + g_snapshot->nodes[g_snapshot->node_ptr_to_index_map[(jl_value_t*)jl_malloc_tag]].self_size += bytes; +} + +static inline void _record_gc_edge(const char *node_type, const char *edge_type, + jl_value_t *a, jl_value_t *b, size_t name_or_idx) JL_NOTSAFEPOINT +{ + auto from_node_idx = record_node_to_gc_snapshot(a); + auto to_node_idx = record_node_to_gc_snapshot(b); + + auto &from_node = g_snapshot->nodes[from_node_idx]; + from_node.type = g_snapshot->node_types.find_or_create_string_id(node_type); + + _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx); +} + +void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) { + from_node.edges.push_back(Edge{ + g_snapshot->edge_types.find_or_create_string_id(edge_type), + name_or_idx, // edge label + to_idx, // to + }); + + g_snapshot->num_edges += 1; +} + +void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot) { + // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 + ios_printf(stream, "{\"snapshot\":{"); + ios_printf(stream, "\"meta\":{"); + ios_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); + ios_printf(stream, "\"node_types\":["); + snapshot.node_types.print_json_array(stream, false); + ios_printf(stream, ","); + ios_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); + ios_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); + ios_printf(stream, "\"edge_types\":["); + snapshot.edge_types.print_json_array(stream, false); + ios_printf(stream, ","); + ios_printf(stream, "\"string_or_number\",\"from_node\"]"); + ios_printf(stream, "},\n"); // end "meta" + ios_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); + ios_printf(stream, "\"edge_count\":%zu", snapshot.num_edges); + ios_printf(stream, "},\n"); // end "snapshot" + + ios_printf(stream, "\"nodes\":["); + bool first_node = true; + for (const auto &from_node : snapshot.nodes) { + if (first_node) { + first_node = false; + } else { + ios_printf(stream, ","); + } + // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] + ios_printf(stream, "%zu", from_node.type); + ios_printf(stream, ",%zu", snapshot.names.find_or_create_string_id(from_node.name)); + ios_printf(stream, ",%zu", from_node.id); + ios_printf(stream, ",%zu", from_node.self_size); + ios_printf(stream, ",%zu", from_node.edges.size()); + ios_printf(stream, ",%zu", from_node.trace_node_id); + ios_printf(stream, ",%d", from_node.detachedness); + ios_printf(stream, "\n"); + } + ios_printf(stream, "],\n"); + + ios_printf(stream, "\"edges\":["); + bool first_edge = true; + for (const auto &from_node : snapshot.nodes) { + for (const auto &edge : from_node.edges) { + if (first_edge) { + first_edge = false; + } else { + ios_printf(stream, ","); + } + ios_printf(stream, "%zu", edge.type); + ios_printf(stream, ",%zu", edge.name_or_index); + ios_printf(stream, ",%zu", edge.to_node * k_node_number_of_fields); + ios_printf(stream, "\n"); + } + } + ios_printf(stream, "],\n"); // end "edges" + + ios_printf(stream, "\"strings\":"); + + snapshot.names.print_json_array(stream, true); + + ios_printf(stream, "}"); +} diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h new file mode 100644 index 0000000000000..05bb9d48f0c4d --- /dev/null +++ b/src/gc-heap-snapshot.h @@ -0,0 +1,93 @@ +// This file is a part of Julia. License is MIT: https://julialang.org/license + +#ifndef JL_GC_HEAP_SNAPSHOT_H +#define JL_GC_HEAP_SNAPSHOT_H + +#include "julia.h" +#include "ios.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +// --------------------------------------------------------------------- +// Functions to call from GC when heap snapshot is enabled +// --------------------------------------------------------------------- +void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_frame_to_object_edge(jl_gcframe_t *from, jl_value_t *to) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT; +void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT; +// Used for objects managed by GC, but which aren't exposed in the julia object, so have no +// field or index. i.e. they're not reacahable from julia code, but we _will_ hit them in +// the GC mark phase (so we can check their type tag to get the size). +void _gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT; +// Used for objects manually allocated in C (outside julia GC), to still tell the heap snapshot about the +// size of the object, even though we're never going to mark that object. +void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOTSAFEPOINT; + + +extern int gc_heap_snapshot_enabled; +extern int prev_sweep_full; // defined in gc.c + +static inline void gc_heap_snapshot_record_frame_to_object_edge(jl_gcframe_t *from, jl_value_t *to) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { + _gc_heap_snapshot_record_frame_to_object_edge(from, to); + } +} +static inline void gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, jl_gcframe_t *to) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { + _gc_heap_snapshot_record_task_to_frame_edge(from, to); + } +} +static inline void gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { + _gc_heap_snapshot_record_frame_to_frame_edge(from, to); + } +} +static inline void gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { + _gc_heap_snapshot_record_root(root, name); + } +} +static inline void gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { + _gc_heap_snapshot_record_array_edge(from, to, index); + } +} +static inline void gc_heap_snapshot_record_module_edge(jl_module_t *from, jl_value_t *to, char *name) JL_NOTSAFEPOINT { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { + _gc_heap_snapshot_record_module_edge(from, to, name); + } +} +static inline void gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void* slot) JL_NOTSAFEPOINT { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { + _gc_heap_snapshot_record_object_edge(from, to, slot); + } +} +static inline void gc_heap_snapshot_record_internal_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { + _gc_heap_snapshot_record_internal_edge(from, to); + } +} +static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, size_t bytes) JL_NOTSAFEPOINT { + if (__unlikely(gc_heap_snapshot_enabled && prev_sweep_full)) { + _gc_heap_snapshot_record_hidden_edge(from, bytes); + } +} + +// --------------------------------------------------------------------- +// Functions to call from Julia to take heap snapshot +// --------------------------------------------------------------------- +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream); + + +#ifdef __cplusplus +} +#endif + + +#endif // JL_GC_HEAP_SNAPSHOT_H diff --git a/src/gc.c b/src/gc.c index 56b2c31cbe7f1..f449138f179e3 100644 --- a/src/gc.c +++ b/src/gc.c @@ -646,7 +646,7 @@ static int mark_reset_age = 0; static int64_t scanned_bytes; // young bytes scanned while marking static int64_t perm_scanned_bytes; // old bytes scanned while marking -static int prev_sweep_full = 1; +int prev_sweep_full = 1; #define inc_sat(v,s) v = (v) >= s ? s : (v)+1 @@ -1817,9 +1817,12 @@ STATIC_INLINE int gc_mark_scan_objarray(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, (void)jl_assume(objary == (gc_mark_objarray_t*)sp->data); for (; begin < end; begin += objary->step) { *pnew_obj = *begin; - if (*pnew_obj) + if (*pnew_obj) { verify_parent2("obj array", objary->parent, begin, "elem(%d)", gc_slot_to_arrayidx(objary->parent, begin)); + gc_heap_snapshot_record_array_edge(objary->parent, *begin, + gc_slot_to_arrayidx(objary->parent, begin)); + } if (!gc_try_setmark(*pnew_obj, &objary->nptr, ptag, pbits)) continue; begin += objary->step; @@ -1853,9 +1856,12 @@ STATIC_INLINE int gc_mark_scan_array8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, for (; elem_begin < elem_end; elem_begin++) { jl_value_t **slot = &begin[*elem_begin]; *pnew_obj = *slot; - if (*pnew_obj) + if (*pnew_obj) { verify_parent2("array", ary8->elem.parent, slot, "elem(%d)", gc_slot_to_arrayidx(ary8->elem.parent, begin)); + gc_heap_snapshot_record_array_edge(ary8->elem.parent, *slot, + gc_slot_to_arrayidx(ary8->elem.parent, begin)); + } if (!gc_try_setmark(*pnew_obj, &ary8->elem.nptr, ptag, pbits)) continue; elem_begin++; @@ -1901,9 +1907,12 @@ STATIC_INLINE int gc_mark_scan_array16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, for (; elem_begin < elem_end; elem_begin++) { jl_value_t **slot = &begin[*elem_begin]; *pnew_obj = *slot; - if (*pnew_obj) + if (*pnew_obj) { verify_parent2("array", ary16->elem.parent, slot, "elem(%d)", gc_slot_to_arrayidx(ary16->elem.parent, begin)); + gc_heap_snapshot_record_array_edge(ary16->elem.parent, *slot, + gc_slot_to_arrayidx(ary16->elem.parent, begin)); + } if (!gc_try_setmark(*pnew_obj, &ary16->elem.nptr, ptag, pbits)) continue; elem_begin++; @@ -1947,9 +1956,11 @@ STATIC_INLINE int gc_mark_scan_obj8(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mark for (; begin < end; begin++) { jl_value_t **slot = &((jl_value_t**)parent)[*begin]; *pnew_obj = *slot; - if (*pnew_obj) + if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); + gc_heap_snapshot_record_object_edge(parent, *slot, slot); + } if (!gc_try_setmark(*pnew_obj, &obj8->nptr, ptag, pbits)) continue; begin++; @@ -1980,9 +1991,12 @@ STATIC_INLINE int gc_mark_scan_obj16(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar for (; begin < end; begin++) { jl_value_t **slot = &((jl_value_t**)parent)[*begin]; *pnew_obj = *slot; - if (*pnew_obj) + if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); + // TODO: Should this be *parent? Given the way it's used above? + gc_heap_snapshot_record_object_edge(parent, *slot, slot); + } if (!gc_try_setmark(*pnew_obj, &obj16->nptr, ptag, pbits)) continue; begin++; @@ -2013,9 +2027,11 @@ STATIC_INLINE int gc_mark_scan_obj32(jl_ptls_t ptls, jl_gc_mark_sp_t *sp, gc_mar for (; begin < end; begin++) { jl_value_t **slot = &((jl_value_t**)parent)[*begin]; *pnew_obj = *slot; - if (*pnew_obj) + if (*pnew_obj) { verify_parent2("object", parent, slot, "field(%d)", gc_slot_to_fieldidx(parent, slot)); + gc_heap_snapshot_record_object_edge(parent, *slot, slot); + } if (!gc_try_setmark(*pnew_obj, &obj32->nptr, ptag, pbits)) continue; begin++; @@ -2302,6 +2318,7 @@ stack: { } if (!gc_try_setmark(new_obj, &nptr, &tag, &bits)) continue; + gc_heap_snapshot_record_frame_to_object_edge(s, new_obj); i++; if (i < nr) { // Haven't done with this one yet. Update the content and push it back @@ -2319,7 +2336,9 @@ stack: { goto mark; } s = (jl_gcframe_t*)gc_read_stack(&s->prev, offset, lb, ub); + // walk up one stack frame if (s != 0) { + gc_heap_snapshot_record_frame_to_frame_edge(stack->s, s); stack->s = s; i = 0; uintptr_t new_nroots = gc_read_stack(&s->nroots, offset, lb, ub); @@ -2403,12 +2422,16 @@ module_binding: { } void *vb = jl_astaggedvalue(b); verify_parent1("module", binding->parent, &vb, "binding_buff"); + // Record the size used for the box for non-const bindings + gc_heap_snapshot_record_internal_edge(binding->parent, b); (void)vb; jl_value_t *value = jl_atomic_load_relaxed(&b->value); jl_value_t *globalref = jl_atomic_load_relaxed(&b->globalref); if (value) { verify_parent2("module", binding->parent, &b->value, "binding(%s)", jl_symbol_name(b->name)); + gc_heap_snapshot_record_module_edge(binding->parent, value, + jl_symbol_name(b->name)); if (gc_try_setmark(value, &binding->nptr, &tag, &bits)) { new_obj = value; begin += 2; @@ -2540,6 +2563,7 @@ mark: { if (flags.how == 1) { void *val_buf = jl_astaggedvalue((char*)a->data - a->offset * a->elsize); verify_parent1("array", new_obj, &val_buf, "buffer ('loc' addr is meaningless)"); + gc_heap_snapshot_record_internal_edge(new_obj, jl_valueof(val_buf)); (void)val_buf; gc_setmark_buf_(ptls, (char*)a->data - a->offset * a->elsize, bits, jl_array_nbytes(a)); @@ -2548,6 +2572,7 @@ mark: { if (update_meta || foreign_alloc) { objprofile_count(jl_malloc_tag, bits == GC_OLD_MARKED, jl_array_nbytes(a)); + gc_heap_snapshot_record_hidden_edge(new_obj, jl_array_nbytes(a)); if (bits == GC_OLD_MARKED) { ptls->gc_cache.perm_scanned_bytes += jl_array_nbytes(a); } @@ -2559,6 +2584,8 @@ mark: { else if (flags.how == 3) { jl_value_t *owner = jl_array_data_owner(a); uintptr_t nptr = (1 << 2) | (bits & GC_OLD); + // TODO: Keep an eye on the edge type here, we're _pretty sure_ it's right.. + gc_heap_snapshot_record_internal_edge(new_obj, owner); int markowner = gc_try_setmark(owner, &nptr, &tag, &bits); gc_mark_push_remset(ptls, new_obj, nptr); if (markowner) { @@ -2655,8 +2682,12 @@ mark: { } #ifdef COPY_STACKS void *stkbuf = ta->stkbuf; - if (stkbuf && ta->copy_stack) + if (stkbuf && ta->copy_stack) { gc_setmark_buf_(ptls, stkbuf, bits, ta->bufsz); + // TODO: attribute size of stack + // TODO: edge to stack data + // TODO: synthetic node for stack data (how big is it?) + } #endif jl_gcframe_t *s = ta->gcstack; size_t nroots; @@ -2675,6 +2706,8 @@ mark: { #endif if (s) { nroots = gc_read_stack(&s->nroots, offset, lb, ub); + gc_heap_snapshot_record_task_to_frame_edge(ta, s); + assert(nroots <= UINT32_MAX); gc_mark_stackframe_t stackdata = {s, 0, (uint32_t)nroots, offset, lb, ub}; gc_mark_stack_push(&ptls->gc_cache, &sp, gc_mark_laddr(stack), @@ -2778,13 +2811,21 @@ static void jl_gc_queue_thread_local(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp jl_ptls_t ptls2) { gc_mark_queue_obj(gc_cache, sp, jl_atomic_load_relaxed(&ptls2->current_task)); + gc_heap_snapshot_record_root(ptls2->current_task, "current task"); gc_mark_queue_obj(gc_cache, sp, ptls2->root_task); - if (ptls2->next_task) + gc_heap_snapshot_record_root(ptls2->current_task, "root task"); + if (ptls2->next_task) { gc_mark_queue_obj(gc_cache, sp, ptls2->next_task); - if (ptls2->previous_task) // shouldn't be necessary, but no reason not to + gc_heap_snapshot_record_root(ptls2->next_task, "next task"); + } + if (ptls2->previous_task) { // shouldn't be necessary, but no reason not to gc_mark_queue_obj(gc_cache, sp, ptls2->previous_task); - if (ptls2->previous_exception) + gc_heap_snapshot_record_root(ptls2->previous_task, "previous task"); + } + if (ptls2->previous_exception) { gc_mark_queue_obj(gc_cache, sp, ptls2->previous_exception); + gc_heap_snapshot_record_root(ptls2->previous_exception, "previous exception"); + } } void jl_gc_mark_enqueued_tasks(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp); @@ -2795,8 +2836,10 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) { // modules gc_mark_queue_obj(gc_cache, sp, jl_main_module); + gc_heap_snapshot_record_root(jl_main_module, "main_module"); // tasks + // TODO: is this dead code? jl_gc_mark_enqueued_tasks(gc_cache, sp); // invisible builtin values @@ -2807,16 +2850,21 @@ static void mark_roots(jl_gc_mark_cache_t *gc_cache, jl_gc_mark_sp_t *sp) for (size_t i = 0; i < jl_current_modules.size; i += 2) { if (jl_current_modules.table[i + 1] != HT_NOTFOUND) { gc_mark_queue_obj(gc_cache, sp, jl_current_modules.table[i]); + gc_heap_snapshot_record_root(jl_current_modules.table[i], "current_module"); } } gc_mark_queue_obj(gc_cache, sp, jl_anytuple_type_type); for (size_t i = 0; i < N_CALL_CACHE; i++) { jl_typemap_entry_t *v = jl_atomic_load_relaxed(&call_cache[i]); - if (v != NULL) + if (v != NULL) { gc_mark_queue_obj(gc_cache, sp, v); + gc_heap_snapshot_record_root(v, "type_map"); + } } - if (jl_all_methods != NULL) + if (jl_all_methods != NULL) { gc_mark_queue_obj(gc_cache, sp, jl_all_methods); + gc_heap_snapshot_record_root(jl_all_methods, "all_methods"); + } if (_jl_debug_method_invalidation != NULL) gc_mark_queue_obj(gc_cache, sp, _jl_debug_method_invalidation); @@ -3028,8 +3076,10 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) // 2.1. mark every object in the `last_remsets` and `rem_binding` jl_gc_queue_remset(gc_cache, &sp, ptls2); // 2.2. mark every thread local root + // TODO: treat these as roots jl_gc_queue_thread_local(gc_cache, &sp, ptls2); // 2.3. mark any managed objects in the backtrace buffer + // TODO: treat these as roots jl_gc_queue_bt_buf(gc_cache, &sp, ptls2); } diff --git a/src/gc.h b/src/gc.h index 544486d933e10..11f7ce5b805ff 100644 --- a/src/gc.h +++ b/src/gc.h @@ -24,6 +24,7 @@ #endif #endif #include "julia_assert.h" +#include "gc-heap-snapshot.h" #include "gc-alloc-profiler.h" #ifdef __cplusplus @@ -632,8 +633,10 @@ extern int gc_verifying; #define verify_parent2(ty,obj,slot,arg1,arg2) do {} while (0) #define gc_verifying (0) #endif -int gc_slot_to_fieldidx(void *_obj, void *slot); -int gc_slot_to_arrayidx(void *_obj, void *begin); + + +int gc_slot_to_fieldidx(void *_obj, void *slot) JL_NOTSAFEPOINT; +int gc_slot_to_arrayidx(void *_obj, void *begin) JL_NOTSAFEPOINT; NOINLINE void gc_mark_loop_unwind(jl_ptls_t ptls, jl_gc_mark_sp_t sp, int pc_offset); #ifdef GC_DEBUG_ENV