From d4e8361fb6aff72ed70f9ebd5efdd8280e682633 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 29 Sep 2023 13:12:58 -0600 Subject: [PATCH 01/13] Streaming the heap snapshot! This should prevent the engine from OOMing while recording the snapshot! Now we just need to sample the files, either online, before downloading, or offline after downloading :) If we're gonna do it offline, we'll want to gzip the files before downloading them. --- src/gc-heap-snapshot.cpp | 218 ++++++++++++++++------------------ src/gc-heap-snapshot.h | 3 +- stdlib/Profile/src/Profile.jl | 28 ++++- 3 files changed, 134 insertions(+), 115 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index b1401653d99ff..6b6a4e3138ece 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -55,6 +55,7 @@ void print_str_escape_json(ios_t *stream, StringRef s) struct Edge { size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. size_t name_or_index; // name of the field (for objects/modules) or index of array + size_t from_node; size_t to_node; }; @@ -73,7 +74,6 @@ struct Node { // whether the from_node is attached or dettached from the main application state // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 int detachedness; // 0 - unknown, 1 - attached, 2 - detached - vector edges; ~Node() JL_NOTSAFEPOINT = default; }; @@ -106,15 +106,22 @@ struct StringTable { }; struct HeapSnapshot { - vector nodes; - // edges are stored on each from_node - StringTable names; StringTable node_types; StringTable edge_types; DenseMap node_ptr_to_index_map; - size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. + size_t num_nodes = 0; // Since we stream out to files, + size_t num_edges = 0; // we need to track the counts here. + + Node internal_root; + + // Used for streaming + ios_t *nodes; + ios_t *edges; + ios_t *strings; + ios_t *json; + }; // global heap snapshot, mutated by garbage collector @@ -123,17 +130,22 @@ int gc_heap_snapshot_enabled = 0; HeapSnapshot *g_snapshot = nullptr; extern jl_mutex_t heapsnapshot_lock; +void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one); void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one); static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT; -void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT; +void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT; void _add_internal_root(HeapSnapshot *snapshot); -JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges, + ios_t *strings, ios_t *json, char all_one) { HeapSnapshot snapshot; - _add_internal_root(&snapshot); + snapshot.nodes = nodes; + snapshot.edges = edges; + snapshot.strings = strings; + snapshot.json = json; jl_mutex_lock(&heapsnapshot_lock); @@ -141,6 +153,8 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) g_snapshot = &snapshot; gc_heap_snapshot_enabled = true; + _add_internal_root(&snapshot); + // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot` jl_gc_collect(JL_GC_FULL); @@ -152,30 +166,55 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) // When we return, the snapshot is full // Dump the snapshot - serialize_heap_snapshot((ios_t*)stream, snapshot, all_one); + final_serialize_heap_snapshot((ios_t*)json, (ios_t*)strings, snapshot, all_one); +} + +void serialize_node(HeapSnapshot *snapshot, const Node &node) +{ + // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] + ios_printf(snapshot->nodes, "%zu,%zu,%zu,%zu,%zu,%zu,%d\n", + node.type, + node.name, + node.id, + node.self_size, + 0, // fake edge count for now + node.trace_node_id, + node.detachedness); + + g_snapshot->num_nodes += 1; +} +void serialize_edge(HeapSnapshot *snapshot, const Edge &edge) +{ + // ["type","name_or_index","to_node"] + ios_printf(snapshot->edges, "%zu,%zu,%zu,%zu\n", + edge.type, + edge.name_or_index, + edge.from_node, // NOTE: Row number (not adjusted for k_node_number_of_fields) + edge.to_node); // NOTE: Row number (not adjusted for k_node_number_of_fields) + g_snapshot->num_edges += 1; } // adds a node at id 0 which is the "uber root": // a synthetic node which points to all the GC roots. void _add_internal_root(HeapSnapshot *snapshot) { - Node internal_root{ + snapshot->internal_root = Node{ snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id(""), // name 0, // id 0, // size 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - vector() // outgoing edges + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; - snapshot->nodes.push_back(internal_root); + + serialize_node(snapshot, snapshot->internal_root); } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 // returns the index of the new node size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } @@ -245,7 +284,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT name = StringRef((const char*)str_.buf, str_.size); } - g_snapshot->nodes.push_back(Node{ + auto node = Node{ g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; g_snapshot->names.find_or_create_string_id(name), // size_t name; (size_t)a, // size_t id; @@ -254,8 +293,8 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT sizeof(void*) + self_size, // size_t self_size; 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - vector() // outgoing edges - }); + }; + serialize_node(g_snapshot, node); if (ios_need_close) ios_close(&str_); @@ -265,20 +304,20 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT static size_t record_pointer_to_gc_snapshot(void *a, size_t bytes, StringRef name) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } - g_snapshot->nodes.push_back(Node{ + auto node = Node{ g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type; g_snapshot->names.find_or_create_string_id(name), // size_t name; (size_t)a, // size_t id; bytes, // size_t self_size; 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - vector() // outgoing edges - }); + }; + serialize_node(g_snapshot, node); return val.first->second; } @@ -317,13 +356,11 @@ static string _fieldpath_for_slot(void *obj, void *slot) JL_NOTSAFEPOINT void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT { - record_node_to_gc_snapshot(root); - - auto &internal_root = g_snapshot->nodes.front(); - auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; + size_t to_node_idx = record_node_to_gc_snapshot(root); + size_t internal_root_idx = 0; auto edge_label = g_snapshot->names.find_or_create_string_id(name); - _record_gc_just_edge("internal", internal_root, to_node_idx, edge_label); + _record_gc_just_edge("internal", internal_root_idx, to_node_idx, edge_label); } // Add a node to the heap snapshot representing a Julia stack frame. @@ -332,20 +369,20 @@ void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT // Stack frame nodes point at the objects they have as local variables. size_t _record_stack_frame_node(HeapSnapshot *snapshot, void *frame) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } - snapshot->nodes.push_back(Node{ + auto node = Node{ snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id("(stack frame)"), // name (size_t)frame, // id 1, // size 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - vector() // outgoing edges - }); + }; + serialize_node(snapshot, node); return val.first->second; } @@ -354,30 +391,27 @@ void _gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) J { auto from_node_idx = _record_stack_frame_node(g_snapshot, (jl_gcframe_t*)from); auto to_idx = record_node_to_gc_snapshot(to); - Node &from_node = g_snapshot->nodes[from_node_idx]; auto name_idx = g_snapshot->names.find_or_create_string_id("local var"); - _record_gc_just_edge("internal", from_node, to_idx, name_idx); + _record_gc_just_edge("internal", from_node_idx, to_idx, name_idx); } void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT { auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)from); auto to_node_idx = _record_stack_frame_node(g_snapshot, to); - Node &from_node = g_snapshot->nodes[from_node_idx]; auto name_idx = g_snapshot->names.find_or_create_string_id("stack"); - _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); + _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx); } void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT { auto from_node_idx = _record_stack_frame_node(g_snapshot, from); auto to_node_idx = _record_stack_frame_node(g_snapshot, to); - Node &from_node = g_snapshot->nodes[from_node_idx]; auto name_idx = g_snapshot->names.find_or_create_string_id("next frame"); - _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); + _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx); } void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT @@ -405,13 +439,10 @@ void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_binding_ auto ty_idx = ty ? record_node_to_gc_snapshot(ty) : 0; auto globalref_idx = record_node_to_gc_snapshot((jl_value_t*)globalref); - auto &from_node = g_snapshot->nodes[from_node_idx]; - auto &to_node = g_snapshot->nodes[to_node_idx]; - - _record_gc_just_edge("property", from_node, to_node_idx, g_snapshot->names.find_or_create_string_id("")); - if (value_idx) _record_gc_just_edge("internal", to_node, value_idx, g_snapshot->names.find_or_create_string_id("value")); - if (ty_idx) _record_gc_just_edge("internal", to_node, ty_idx, g_snapshot->names.find_or_create_string_id("ty")); - if (globalref_idx) _record_gc_just_edge("internal", to_node, globalref_idx, g_snapshot->names.find_or_create_string_id("globalref")); + _record_gc_just_edge("property", from_node_idx, to_node_idx, g_snapshot->names.find_or_create_string_id("")); + if (value_idx) _record_gc_just_edge("internal", to_node_idx, value_idx, g_snapshot->names.find_or_create_string_id("value")); + if (ty_idx) _record_gc_just_edge("internal", to_node_idx, ty_idx, g_snapshot->names.find_or_create_string_id("ty")); + if (globalref_idx) _record_gc_just_edge("internal", to_node_idx, globalref_idx, g_snapshot->names.find_or_create_string_id("globalref")); } void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT @@ -442,9 +473,8 @@ void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t byt break; } auto to_node_idx = record_pointer_to_gc_snapshot(to, bytes, alloc_kind); - auto &from_node = g_snapshot->nodes[from_node_idx]; - _record_gc_just_edge("hidden", from_node, to_node_idx, name_or_idx); + _record_gc_just_edge("hidden", from_node_idx, to_node_idx, name_or_idx); } static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, @@ -453,84 +483,46 @@ static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, auto from_node_idx = record_node_to_gc_snapshot(a); auto to_node_idx = record_node_to_gc_snapshot(b); - auto &from_node = g_snapshot->nodes[from_node_idx]; - - _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx); + _record_gc_just_edge(edge_type, from_node_idx, to_node_idx, name_or_idx); } -void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT +void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT { - from_node.edges.push_back(Edge{ + auto edge = Edge{ g_snapshot->edge_types.find_or_create_string_id(edge_type), name_or_idx, // edge label + from_idx, // from to_idx // to - }); + }; - g_snapshot->num_edges += 1; + serialize_edge(g_snapshot, edge); } -void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one) +void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one) { // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 - ios_printf(stream, "{\"snapshot\":{"); - ios_printf(stream, "\"meta\":{"); - ios_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); - ios_printf(stream, "\"node_types\":["); - snapshot.node_types.print_json_array(stream, false); - ios_printf(stream, ","); - ios_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); - ios_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); - ios_printf(stream, "\"edge_types\":["); - snapshot.edge_types.print_json_array(stream, false); - ios_printf(stream, ","); - ios_printf(stream, "\"string_or_number\",\"from_node\"]"); - ios_printf(stream, "},\n"); // end "meta" - ios_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); - ios_printf(stream, "\"edge_count\":%zu", snapshot.num_edges); - ios_printf(stream, "},\n"); // end "snapshot" - - ios_printf(stream, "\"nodes\":["); - bool first_node = true; - for (const auto &from_node : snapshot.nodes) { - if (first_node) { - first_node = false; - } - else { - ios_printf(stream, ","); - } - // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - ios_printf(stream, "%zu,%zu,%zu,%zu,%zu,%zu,%d\n", - from_node.type, - from_node.name, - from_node.id, - all_one ? (size_t)1 : from_node.self_size, - from_node.edges.size(), - from_node.trace_node_id, - from_node.detachedness); - } - ios_printf(stream, "],\n"); - - ios_printf(stream, "\"edges\":["); - bool first_edge = true; - for (const auto &from_node : snapshot.nodes) { - for (const auto &edge : from_node.edges) { - if (first_edge) { - first_edge = false; - } - else { - ios_printf(stream, ","); - } - ios_printf(stream, "%zu,%zu,%zu\n", - edge.type, - edge.name_or_index, - edge.to_node * k_node_number_of_fields); - } - } - ios_printf(stream, "],\n"); // end "edges" - - ios_printf(stream, "\"strings\":"); - - snapshot.names.print_json_array(stream, true); + ios_printf(json, "{\"snapshot\":{"); + ios_printf(json, "\"meta\":{"); + ios_printf(json, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); + ios_printf(json, "\"node_types\":["); + snapshot.node_types.print_json_array(json, false); + ios_printf(json, ","); + ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); + ios_printf(json, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); + ios_printf(json, "\"edge_types\":["); + snapshot.edge_types.print_json_array(json, false); + ios_printf(json, ","); + ios_printf(json, "\"string_or_number\",\"from_node\"]"); + ios_printf(json, "},\n"); // end "meta" + ios_printf(json, "\"node_count\":%zu,", snapshot.num_nodes); + ios_printf(json, "\"edge_count\":%zu", snapshot.num_edges); + ios_printf(json, "}\n"); // end "snapshot" + ios_printf(json, "}"); + + ios_printf(strings, "{\n"); + ios_printf(strings, "\"strings\":"); + + snapshot.names.print_json_array(strings, true); + ios_printf(strings, "}"); - ios_printf(stream, "}"); } diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 8c3af5b86bec7..fdba043c0fef1 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -97,7 +97,8 @@ static inline void gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* t // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot // --------------------------------------------------------------------- -JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one); +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges, + ios_t *strings, ios_t *json, char all_one); #ifdef __cplusplus diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index c37cdd0af0368..c8ad7a6971382 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -1227,8 +1227,34 @@ If `all_one` is true, then report the size of every object as one so they can be counted. Otherwise, report the actual size. """ function take_heap_snapshot(io::IOStream, all_one::Bool=false) - Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one))) + Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot_streaming, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one))) end +function stream_snapshot() + now = Dates.now() + open("./$now.nodes", "w") do nodes + open("./$now.edges", "w") do edges + open("./$now.strings", "w") do strings + open("./$now.json", "w") do json + @Base._lock_ios(nodes, + @Base._lock_ios(edges, + @Base._lock_ios(strings, + @Base._lock_ios(json, + ccall(:jl_gc_take_heap_snapshot_streaming, + Cvoid, + (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar), + nodes.handle, edges.handle, strings.handle, json.handle, + Cchar(0)) + ) + ) + ) + ) + end + end + end + end + println("Recorded heap snapshot: $now") +end + function take_heap_snapshot(filepath::String, all_one::Bool=false) open(filepath, "w") do io take_heap_snapshot(io, all_one) From 7b86d3bf3df325fc35955ae55473f4b37d2866e0 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 29 Sep 2023 13:13:46 -0600 Subject: [PATCH 02/13] Allow custom filename; use original API --- stdlib/Profile/src/Profile.jl | 36 +++++++++++++---------------------- 1 file changed, 13 insertions(+), 23 deletions(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index c8ad7a6971382..e31a05f6ad583 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -1213,7 +1213,6 @@ end """ - Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false) Profile.take_heap_snapshot(filepath::String, all_one::Bool=false) Profile.take_heap_snapshot(all_one::Bool=false; dir::String) @@ -1226,24 +1225,21 @@ full file path, or IO stream. If `all_one` is true, then report the size of every object as one so they can be easily counted. Otherwise, report the actual size. """ -function take_heap_snapshot(io::IOStream, all_one::Bool=false) - Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot_streaming, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one))) -end -function stream_snapshot() - now = Dates.now() - open("./$now.nodes", "w") do nodes - open("./$now.edges", "w") do edges - open("./$now.strings", "w") do strings - open("./$now.json", "w") do json - @Base._lock_ios(nodes, - @Base._lock_ios(edges, - @Base._lock_ios(strings, - @Base._lock_ios(json, - ccall(:jl_gc_take_heap_snapshot_streaming, +function take_heap_snapshot(filepath::String, all_one::Bool=false) + name = filepath + open("$name.nodes", "w") do nodes + open("$name.edges", "w") do edges + open("$name.strings", "w") do strings + open("$name.json", "w") do json + Base.@_lock_ios(nodes, + Base.@_lock_ios(edges, + Base.@_lock_ios(strings, + Base.@_lock_ios(json, + ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar), nodes.handle, edges.handle, strings.handle, json.handle, - Cchar(0)) + Cchar(all_one)) ) ) ) @@ -1252,13 +1248,7 @@ function stream_snapshot() end end end - println("Recorded heap snapshot: $now") -end - -function take_heap_snapshot(filepath::String, all_one::Bool=false) - open(filepath, "w") do io - take_heap_snapshot(io, all_one) - end + println("Recorded heap snapshot: $name") return filepath end function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString} From 726f9e1f86804dcf2ba1ea308af8b78fea39f03e Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 29 Sep 2023 14:15:03 -0600 Subject: [PATCH 03/13] Support legacy heap snapshot interface. Add reassembly function. --- stdlib/Profile/src/Profile.jl | 47 +++-- stdlib/Profile/src/heapsnapshot_reassemble.jl | 161 ++++++++++++++++++ 2 files changed, 198 insertions(+), 10 deletions(-) create mode 100644 stdlib/Profile/src/heapsnapshot_reassemble.jl diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index e31a05f6ad583..1ac596475f821 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -1213,8 +1213,8 @@ end """ - Profile.take_heap_snapshot(filepath::String, all_one::Bool=false) - Profile.take_heap_snapshot(all_one::Bool=false; dir::String) + Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false) + Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false) Write a snapshot of the heap, in the JSON format expected by the Chrome Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file @@ -1224,13 +1224,41 @@ full file path, or IO stream. If `all_one` is true, then report the size of every object as one so they can be easily counted. Otherwise, report the actual size. + +If `streaming` is true, we will stream the snapshot data out into four files, using filepath +as the prefix, to avoid having to hold the entire snapshot in memory. This option should be +used for any setting where your memory is constrained. These files can then be reassembled +by calling [`Profile.HeapSnapshot.assemble_snapshot(filepath; out_file)`](@ref), which can +be done offline. """ -function take_heap_snapshot(filepath::String, all_one::Bool=false) - name = filepath - open("$name.nodes", "w") do nodes - open("$name.edges", "w") do edges - open("$name.strings", "w") do strings - open("$name.json", "w") do json +function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false) + if streaming + _stream_heap_snapshot(filepath, all_one) + println("Finished streaming heap snapshot parts to prefix: $filepath") + else + # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir, + # then reassembling it after we're done. + dir = tempdir() + prefix = joinpath(dir, "snapshot") + _stream_heap_snapshot(prefix, all_one) + Profile.HeapSnapshot.assemble_snapshot(prefix, filepath) + println("Recorded heap snapshot: $filepath") + end + return filepath +end +function take_heap_snapshot(io::IO, all_one::Bool=false) + # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir, + # then reassembling it after we're done. + dir = tempdir() + prefix = joinpath(dir, "snapshot") + _stream_heap_snapshot(prefix, all_one) + Profile.HeapSnapshot.assemble_snapshot(prefix, io) +end +function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool) + open("$prefix.nodes", "w") do nodes + open("$prefix.edges", "w") do edges + open("$prefix.strings", "w") do strings + open("$prefix.json", "w") do json Base.@_lock_ios(nodes, Base.@_lock_ios(edges, Base.@_lock_ios(strings, @@ -1248,8 +1276,6 @@ function take_heap_snapshot(filepath::String, all_one::Bool=false) end end end - println("Recorded heap snapshot: $name") - return filepath end function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString} fname = "$(getpid())_$(time_ns()).heapsnapshot" @@ -1271,6 +1297,7 @@ end include("Allocs.jl") +include("heapsnapshot_reassemble.jl") include("precompile.jl") end # module diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl new file mode 100644 index 0000000000000..98c6433375c1d --- /dev/null +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -0,0 +1,161 @@ +# TODO(PR): This code hasn't been reviewed yet. + +module HeapSnapshot + +# SoA layout to reduce padding +struct Edges + type::Vector{Int8} # index into `snapshot.meta.edge_types` + name_index::Vector{UInt} # index into `snapshot.strings` + to_pos::Vector{UInt32} # index into `snapshot.nodes` +end +function init_edges(n::Int) + Edges( + Vector{Int8}(undef, n), + Vector{UInt}(undef, n), + Vector{UInt32}(undef, n), + ) +end +Base.length(n::Edges) = length(n.type) + +# trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them +struct Nodes + type::Vector{Int8} # index in index into `snapshot.meta.node_types` + name_index::Vector{UInt32} # index in `snapshot.strings` + id::Vector{UInt} # unique id, in julia it is the address of the object + self_size::Vector{Int} # size of the object itself, not including the size of its fields + edge_count::Vector{UInt32} # number of outgoing edges + edges::Edges # outgoing edges +end +function init_nodes(n::Int, e::Int) + Nodes( + Vector{Int8}(undef, n), + Vector{UInt32}(undef, n), + Vector{UInt}(undef, n), + Vector{Int}(undef, n), + Vector{UInt32}(undef, n), + init_edges(e), + ) +end +Base.length(n::Nodes) = length(n.type) + +# Like Base.dec, but doesn't allocate a string and writes directly to the io object +# We know all of the numbers we're about to write fit into a UInt and are non-negative +let _digits_buf = zeros(UInt8, ndigits(typemax(UInt))), + _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] + global _write_decimal_number + _write_decimal_number(io, x::Integer, a=_digits_buf) = _write_decimal_number(io, unsigned(x), a) + function _write_decimal_number(io, x::Unsigned, a=_digits_buf) + n = ndigits(x) + i = n + @inbounds while i >= 2 + d, r = divrem(x, 0x64) + d100 = _dec_d100[(r % Int)::Int + 1] + a[i-1] = d100 % UInt8 + a[i] = (d100 >> 0x8) % UInt8 + x = oftype(x, d) + i -= 2 + end + if i > 0 + @inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8 + end + write(io, @view a[max(i, 1):n]) + end +end + +function assemble_snapshot(in_prefix, out_file::AbstractString) + open(out_file, "w") do io + assemble_snapshot(in_prefix, io) + end +end +function assemble_snapshot(in_prefix, io::IO) + preamble = read(string(in_prefix, ".json"), String) + pos = last(findfirst("node_count\":", preamble)) + 1 + endpos = findnext(==(','), preamble, pos) - 1 + node_count = parse(Int, String(@view preamble[pos:endpos])) + + pos = last(findnext("edge_count\":", preamble, endpos)) + 1 + endpos = findnext(==('}'), preamble, pos) - 1 + edge_count = parse(Int, String(@view preamble[pos:endpos])) + + nodes = init_nodes(node_count, edge_count) + + # Parse nodes with empty edge counts that we need to fill later + # TODO: preallocate line buffer + for (i, line) in enumerate(eachline(string(in_prefix, ".nodes"))) + iter = eachsplit(line, ',') + x, s = iterate(iter) + node_type = parse(Int8, x) + x, s = iterate(iter, s) + node_name_index = parse(UInt32, x) + x, s = iterate(iter, s) + id = parse(UInt, x) + x, s = iterate(iter, s) + self_size = parse(Int, x) + x, s = iterate(iter, s) + edge_count = parse(UInt, x) + @assert edge_count == 0 + x, s = iterate(iter, s) + @assert parse(Int8, x) == 0 # trace_node_id + x, s = iterate(iter, s) + @assert parse(Int8, x) == 0 # detachedness + + nodes.type[i] = node_type + nodes.name_index[i] = node_name_index + nodes.id[i] = id + nodes.self_size[i] = self_size + nodes.edge_count[i] = edge_count + end + + # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets + # TODO: preallocate line buffer + for (i, line) in enumerate(eachline(string(in_prefix, ".edges"))) + iter = eachsplit(line, ',') + x, s = iterate(iter) + edge_type = parse(Int8, x) + x, s = iterate(iter, s) + edge_name_index = parse(UInt, x) + x, s = iterate(iter, s) + to_node = parse(UInt32, x) + x, s = iterate(iter, s) + from_node = parse(Int, x) + + nodes.edges.type[i] = edge_type + nodes.edges.name_index[i] = edge_name_index + nodes.edges.to_pos[i] = to_node * 7 # 7 fields per node, the streaming format doesn't multiply the offset by 7 + nodes.edge_count[from_node] += UInt32(1) + end + + _digits_buf = zeros(UInt8, ndigits(typemax(UInt))) + println(io, @view(preamble[1:end-2]), ",") # remove trailing "}\n", we don't end the snapshot here + println(io, "\"nodes\":[") + for i in 1:length(nodes) + i > 1 && println(io, ",") + _write_decimal_number(io, nodes.type[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.name_index[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.id[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.self_size[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edge_count[i], _digits_buf) + print(io, ",0,0") + end + println(io, "],\"edges\":[") + for i in 1:length(nodes.edges) + i > 1 && println(io, ",") + _write_decimal_number(io, nodes.edges.type[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edges.name_index[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf) + end + open(string(in_prefix, ".strings"), "r") do strings_io + skip(strings_io, 2) # skip "{\n" + println(io, "],") + write(io, strings_io) # strings contain the trailing "}" so we close out what we opened in preamble + end + return nothing +end + +end From 4ee78181b671dbe6437e304843b1fc008d447a89 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 29 Sep 2023 14:46:24 -0600 Subject: [PATCH 04/13] Add tests --- stdlib/Profile/src/heapsnapshot_reassemble.jl | 18 ++++++++++-------- .../Profile/test/heapsnapshot_reassemble.jl | 19 +++++++++++++++++++ stdlib/Profile/test/runtests.jl | 1 + 3 files changed, 30 insertions(+), 8 deletions(-) create mode 100644 stdlib/Profile/test/heapsnapshot_reassemble.jl diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl index 98c6433375c1d..1e58be2f87141 100644 --- a/stdlib/Profile/src/heapsnapshot_reassemble.jl +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -40,25 +40,25 @@ Base.length(n::Nodes) = length(n.type) # Like Base.dec, but doesn't allocate a string and writes directly to the io object # We know all of the numbers we're about to write fit into a UInt and are non-negative -let _digits_buf = zeros(UInt8, ndigits(typemax(UInt))), - _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] +let _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] global _write_decimal_number - _write_decimal_number(io, x::Integer, a=_digits_buf) = _write_decimal_number(io, unsigned(x), a) - function _write_decimal_number(io, x::Unsigned, a=_digits_buf) + _write_decimal_number(io, x::Integer, buf) = _write_decimal_number(io, unsigned(x), buf) + function _write_decimal_number(io, x::Unsigned, digits_buf) + buf = digits_buf n = ndigits(x) i = n @inbounds while i >= 2 d, r = divrem(x, 0x64) d100 = _dec_d100[(r % Int)::Int + 1] - a[i-1] = d100 % UInt8 - a[i] = (d100 >> 0x8) % UInt8 + buf[i-1] = d100 % UInt8 + buf[i] = (d100 >> 0x8) % UInt8 x = oftype(x, d) i -= 2 end if i > 0 - @inbounds a[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8 + @inbounds buf[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8 end - write(io, @view a[max(i, 1):n]) + write(io, @view buf[max(i, 1):n]) end end @@ -67,6 +67,8 @@ function assemble_snapshot(in_prefix, out_file::AbstractString) assemble_snapshot(in_prefix, io) end end +# Manually parse and write the .json files, given that we don't have JSON import/export in +# julia's stdlibs. function assemble_snapshot(in_prefix, io::IO) preamble = read(string(in_prefix, ".json"), String) pos = last(findfirst("node_count\":", preamble)) + 1 diff --git a/stdlib/Profile/test/heapsnapshot_reassemble.jl b/stdlib/Profile/test/heapsnapshot_reassemble.jl new file mode 100644 index 0000000000000..9277848a5ad89 --- /dev/null +++ b/stdlib/Profile/test/heapsnapshot_reassemble.jl @@ -0,0 +1,19 @@ +using Test + +@testset "_write_decimal_number" begin + _digits_buf = zeros(UInt8, ndigits(typemax(UInt))) + io = IOBuffer() + + test_write(d) = begin + Profile.HeapSnapshot._write_decimal_number(io, d, _digits_buf) + s = String(take!(io)) + seekstart(io) + return s + end + @test test_write(0) == "0" + @test test_write(99) == "99" + # Sample among possible UInts we might print + for x in typemin(UInt):typemax(UInt)÷10001:typemax(UInt) + @test test_write(x) == string(x) + end +end diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index 95ec7f857dad7..12d0e2e433369 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -295,3 +295,4 @@ end end include("allocs.jl") +include("heapsnapshot_reassemble.jl") From 78dcd8fc59191df1aab149cbdbb9ab7298c2e725 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 29 Sep 2023 14:49:53 -0600 Subject: [PATCH 05/13] Apply suggestions from code review --- stdlib/Profile/src/heapsnapshot_reassemble.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl index 1e58be2f87141..4dc210521ad77 100644 --- a/stdlib/Profile/src/heapsnapshot_reassemble.jl +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -19,8 +19,8 @@ Base.length(n::Edges) = length(n.type) # trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them struct Nodes - type::Vector{Int8} # index in index into `snapshot.meta.node_types` - name_index::Vector{UInt32} # index in `snapshot.strings` + type::Vector{Int8} # index into `snapshot.meta.node_types` + name_index::Vector{UInt32} # index into `snapshot.strings` id::Vector{UInt} # unique id, in julia it is the address of the object self_size::Vector{Int} # size of the object itself, not including the size of its fields edge_count::Vector{UInt32} # number of outgoing edges From af63e6bae8bb90956db40005f217d92fc956c84d Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Fri, 29 Sep 2023 14:50:04 -0600 Subject: [PATCH 06/13] Update src/gc-heap-snapshot.cpp --- src/gc-heap-snapshot.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 6b6a4e3138ece..758e3d0053843 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -55,7 +55,7 @@ void print_str_escape_json(ios_t *stream, StringRef s) struct Edge { size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. size_t name_or_index; // name of the field (for objects/modules) or index of array - size_t from_node; + size_t from_node; // This is a deviation from the .heapsnapshot format to support streaming. size_t to_node; }; From e7254119b15fc2f6d5568b4691dd06631827378d Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 3 Oct 2023 15:33:46 -0600 Subject: [PATCH 07/13] Change to always save the parts in the same directory This way you can always recover from an OOM --- stdlib/Profile/src/Profile.jl | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 1ac596475f821..1478c4bb64f40 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -1230,16 +1230,24 @@ as the prefix, to avoid having to hold the entire snapshot in memory. This optio used for any setting where your memory is constrained. These files can then be reassembled by calling [`Profile.HeapSnapshot.assemble_snapshot(filepath; out_file)`](@ref), which can be done offline. + +NOTE: We strongly recommend setting streaming=true for performance reasons. Reconstructing +the snapshot from the parts requires holding the entire snapshot in memory, so if the +snapshot is large, you can run out of memory while processing it. Streaming allows you to +reconstruct the snapshot offline, after your workload is done running. +If you do attempt to collect a snapshot with streaming=false (the default, for +backwards-compatibility) and your process is killed, note that this will always save the +parts in the same directory as your provided filepath, so you can still reconstruct the +snapshot after the fact, via `assemble_snapshot()`. """ function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false) if streaming _stream_heap_snapshot(filepath, all_one) println("Finished streaming heap snapshot parts to prefix: $filepath") else - # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir, - # then reassembling it after we're done. - dir = tempdir() - prefix = joinpath(dir, "snapshot") + # Support the legacy, non-streaming mode, by first streaming the parts, then + # reassembling it after we're done. + prefix = filepath _stream_heap_snapshot(prefix, all_one) Profile.HeapSnapshot.assemble_snapshot(prefix, filepath) println("Recorded heap snapshot: $filepath") From 476eef09bc7d265b91af26f8e76547f68b4ca166 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 3 Oct 2023 15:48:31 -0600 Subject: [PATCH 08/13] Fix bug in reassembler: from_node and to_node were in the wrong order --- stdlib/Profile/src/Profile.jl | 2 +- stdlib/Profile/src/heapsnapshot_reassemble.jl | 24 +++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 1478c4bb64f40..bb3cecc994b83 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -1228,7 +1228,7 @@ counted. Otherwise, report the actual size. If `streaming` is true, we will stream the snapshot data out into four files, using filepath as the prefix, to avoid having to hold the entire snapshot in memory. This option should be used for any setting where your memory is constrained. These files can then be reassembled -by calling [`Profile.HeapSnapshot.assemble_snapshot(filepath; out_file)`](@ref), which can +by calling [`Profile.HeapSnapshot.assemble_snapshot(filepath)`](@ref), which can be done offline. NOTE: We strongly recommend setting streaming=true for performance reasons. Reconstructing diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl index 4dc210521ad77..008de9e305453 100644 --- a/stdlib/Profile/src/heapsnapshot_reassemble.jl +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -5,7 +5,7 @@ module HeapSnapshot # SoA layout to reduce padding struct Edges type::Vector{Int8} # index into `snapshot.meta.edge_types` - name_index::Vector{UInt} # index into `snapshot.strings` + name_or_index::Vector{UInt} # Either an index into `snapshot.strings`, or the index in an array, depending on edge_type to_pos::Vector{UInt32} # index into `snapshot.nodes` end function init_edges(n::Int) @@ -20,7 +20,7 @@ Base.length(n::Edges) = length(n.type) # trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them struct Nodes type::Vector{Int8} # index into `snapshot.meta.node_types` - name_index::Vector{UInt32} # index into `snapshot.strings` + name_idx::Vector{UInt32} # index into `snapshot.strings` id::Vector{UInt} # unique id, in julia it is the address of the object self_size::Vector{Int} # size of the object itself, not including the size of its fields edge_count::Vector{UInt32} # number of outgoing edges @@ -62,7 +62,7 @@ let _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] end end -function assemble_snapshot(in_prefix, out_file::AbstractString) +function assemble_snapshot(in_prefix, out_file::AbstractString = in_prefix) open(out_file, "w") do io assemble_snapshot(in_prefix, io) end @@ -88,7 +88,7 @@ function assemble_snapshot(in_prefix, io::IO) x, s = iterate(iter) node_type = parse(Int8, x) x, s = iterate(iter, s) - node_name_index = parse(UInt32, x) + node_name_idx = parse(UInt32, x) x, s = iterate(iter, s) id = parse(UInt, x) x, s = iterate(iter, s) @@ -102,7 +102,7 @@ function assemble_snapshot(in_prefix, io::IO) @assert parse(Int8, x) == 0 # detachedness nodes.type[i] = node_type - nodes.name_index[i] = node_name_index + nodes.name_idx[i] = node_name_idx nodes.id[i] = id nodes.self_size[i] = self_size nodes.edge_count[i] = edge_count @@ -115,16 +115,16 @@ function assemble_snapshot(in_prefix, io::IO) x, s = iterate(iter) edge_type = parse(Int8, x) x, s = iterate(iter, s) - edge_name_index = parse(UInt, x) - x, s = iterate(iter, s) - to_node = parse(UInt32, x) + edge_name_or_index = parse(UInt, x) x, s = iterate(iter, s) from_node = parse(Int, x) + x, s = iterate(iter, s) + to_node = parse(UInt32, x) nodes.edges.type[i] = edge_type - nodes.edges.name_index[i] = edge_name_index + nodes.edges.name_or_index[i] = edge_name_or_index nodes.edges.to_pos[i] = to_node * 7 # 7 fields per node, the streaming format doesn't multiply the offset by 7 - nodes.edge_count[from_node] += UInt32(1) + nodes.edge_count[from_node + 1] += UInt32(1) # C and JSON use 0-based indexing end _digits_buf = zeros(UInt8, ndigits(typemax(UInt))) @@ -134,7 +134,7 @@ function assemble_snapshot(in_prefix, io::IO) i > 1 && println(io, ",") _write_decimal_number(io, nodes.type[i], _digits_buf) print(io, ",") - _write_decimal_number(io, nodes.name_index[i], _digits_buf) + _write_decimal_number(io, nodes.name_idx[i], _digits_buf) print(io, ",") _write_decimal_number(io, nodes.id[i], _digits_buf) print(io, ",") @@ -148,7 +148,7 @@ function assemble_snapshot(in_prefix, io::IO) i > 1 && println(io, ",") _write_decimal_number(io, nodes.edges.type[i], _digits_buf) print(io, ",") - _write_decimal_number(io, nodes.edges.name_index[i], _digits_buf) + _write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf) print(io, ",") _write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf) end From 607b676cbfbde3f8f92626059b3224435eac02e4 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 3 Oct 2023 16:01:54 -0600 Subject: [PATCH 09/13] Fix correctness mistake: The edges have to be reordered according to the node order. That's the whole reason this is tricky. But i'm not sure now whether the SoAs approach is actually an optimization.... It seems like we should probably prefer to inline the Edges right into the vector, rather than having to do another random lookup into the edges table? --- stdlib/Profile/src/heapsnapshot_reassemble.jl | 56 ++++++++++++++++--- .../Profile/test/heapsnapshot_reassemble.jl | 12 ++++ 2 files changed, 59 insertions(+), 9 deletions(-) diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl index 008de9e305453..2ebbb5c9ab3a1 100644 --- a/stdlib/Profile/src/heapsnapshot_reassemble.jl +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -25,6 +25,12 @@ struct Nodes self_size::Vector{Int} # size of the object itself, not including the size of its fields edge_count::Vector{UInt32} # number of outgoing edges edges::Edges # outgoing edges + # This is the main complexity of the .heapsnapshot format, and it's the reason we need + # to read in all the data before writing it out. The edges vector contains all edges, + # but organized by which node they came from. First, it contains all the edges coming + # out of node 0, then all edges leaving node 1, etc. So we need to have visited all + # edges, and assigned them to their corresponding nodes, before we can emit the file. + edge_idxs::Vector{Vector{UInt}} # indexes into edges, keeping per-node outgoing edge ids end function init_nodes(n::Int, e::Int) Nodes( @@ -34,6 +40,7 @@ function init_nodes(n::Int, e::Int) Vector{Int}(undef, n), Vector{UInt32}(undef, n), init_edges(e), + [Vector{UInt}() for _ in 1:n], # Take care to construct n separate empty vectors ) end Base.length(n::Nodes) = length(n.type) @@ -81,9 +88,19 @@ function assemble_snapshot(in_prefix, io::IO) nodes = init_nodes(node_count, edge_count) + shouldlog(i) = false + # N = 100000 + # shouldlog(i) = i % N == 0 + # N *= 10 + # return true + # else + # return false + # end + # Parse nodes with empty edge counts that we need to fill later # TODO: preallocate line buffer for (i, line) in enumerate(eachline(string(in_prefix, ".nodes"))) + shouldlog(i) && println("Parsing node $i") iter = eachsplit(line, ',') x, s = iterate(iter) node_type = parse(Int8, x) @@ -106,6 +123,15 @@ function assemble_snapshot(in_prefix, io::IO) nodes.id[i] = id nodes.self_size[i] = self_size nodes.edge_count[i] = edge_count + + shouldlog(i) && begin + println("Parsed node $i") + @show node_type + @show node_name_idx + @show id + @show self_size + @show edge_count + end end # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets @@ -125,6 +151,7 @@ function assemble_snapshot(in_prefix, io::IO) nodes.edges.name_or_index[i] = edge_name_or_index nodes.edges.to_pos[i] = to_node * 7 # 7 fields per node, the streaming format doesn't multiply the offset by 7 nodes.edge_count[from_node + 1] += UInt32(1) # C and JSON use 0-based indexing + push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges end _digits_buf = zeros(UInt8, ndigits(typemax(UInt))) @@ -143,18 +170,29 @@ function assemble_snapshot(in_prefix, io::IO) _write_decimal_number(io, nodes.edge_count[i], _digits_buf) print(io, ",0,0") end - println(io, "],\"edges\":[") - for i in 1:length(nodes.edges) - i > 1 && println(io, ",") - _write_decimal_number(io, nodes.edges.type[i], _digits_buf) - print(io, ",") - _write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf) - print(io, ",") - _write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf) + print(io, "],\"edges\":[") + e = 1 + for n in 1:length(nodes) + count = nodes.edge_count[n] + len_edges = length(nodes.edge_idxs[n]) + @assert count == len_edges "For node $n: $count != $len_edges" + for i in nodes.edge_idxs[n] + e > 1 && print(io, ",") + println(io) + _write_decimal_number(io, nodes.edges.type[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf) + if !(nodes.edges.to_pos[i] % 7 == 0) + @warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])" + end + e += 1 + end end + println(io, "],") open(string(in_prefix, ".strings"), "r") do strings_io skip(strings_io, 2) # skip "{\n" - println(io, "],") write(io, strings_io) # strings contain the trailing "}" so we close out what we opened in preamble end return nothing diff --git a/stdlib/Profile/test/heapsnapshot_reassemble.jl b/stdlib/Profile/test/heapsnapshot_reassemble.jl index 9277848a5ad89..bf1e6e76bce58 100644 --- a/stdlib/Profile/test/heapsnapshot_reassemble.jl +++ b/stdlib/Profile/test/heapsnapshot_reassemble.jl @@ -12,7 +12,19 @@ using Test end @test test_write(0) == "0" @test test_write(99) == "99" + + @test test_write(UInt8(0)) == "0" + @test test_write(UInt32(0)) == "0" + @test test_write(Int32(0)) == "0" + + @test test_write(UInt8(99)) == "99" + @test test_write(UInt32(99)) == "99" + @test test_write(Int32(99)) == "99" + # Sample among possible UInts we might print + for x in typemin(UInt8):typemax(UInt8) + @test test_write(x) == string(x) + end for x in typemin(UInt):typemax(UInt)÷10001:typemax(UInt) @test test_write(x) == string(x) end From e48f9d00a249c10c4026698e86bf5e33b8340e8b Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 3 Oct 2023 16:42:43 -0600 Subject: [PATCH 10/13] Debugging messed up edge array idxs --- stdlib/Profile/src/heapsnapshot_reassemble.jl | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl index 2ebbb5c9ab3a1..47bd5e3c32721 100644 --- a/stdlib/Profile/src/heapsnapshot_reassemble.jl +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -90,7 +90,7 @@ function assemble_snapshot(in_prefix, io::IO) shouldlog(i) = false # N = 100000 - # shouldlog(i) = i % N == 0 + # shouldlog(i) = i % 10000 == 0 # N *= 10 # return true # else @@ -171,6 +171,7 @@ function assemble_snapshot(in_prefix, io::IO) print(io, ",0,0") end print(io, "],\"edges\":[") + shouldloge(i) = i % 10000 == 0 e = 1 for n in 1:length(nodes) count = nodes.edge_count[n] @@ -187,6 +188,10 @@ function assemble_snapshot(in_prefix, io::IO) if !(nodes.edges.to_pos[i] % 7 == 0) @warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])" end + shouldloge(i) && println("Edge $i: type $(nodes.edges.type[i])") + if nodes.edges.type[i] == 2 # "element" (array index) + println("Array Edge $i: index $(nodes.edges.name_or_index[i])") + end e += 1 end end From 1fa5ec254fcfaa7e63168eac3ca2a0f7e5c10f98 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 3 Oct 2023 17:33:52 -0600 Subject: [PATCH 11/13] Disable log message --- stdlib/Profile/src/heapsnapshot_reassemble.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl index 47bd5e3c32721..3c83dff5daa7e 100644 --- a/stdlib/Profile/src/heapsnapshot_reassemble.jl +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -188,10 +188,10 @@ function assemble_snapshot(in_prefix, io::IO) if !(nodes.edges.to_pos[i] % 7 == 0) @warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])" end - shouldloge(i) && println("Edge $i: type $(nodes.edges.type[i])") - if nodes.edges.type[i] == 2 # "element" (array index) - println("Array Edge $i: index $(nodes.edges.name_or_index[i])") - end + # shouldloge(i) && println("Edge $i: type $(nodes.edges.type[i])") + # if nodes.edges.type[i] == 2 # "element" (array index) + # println("Array Edge $i: index $(nodes.edges.name_or_index[i])") + # end e += 1 end end From 685dc426fa6f98634727e7b0e981044c7196d9ab Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 3 Oct 2023 17:47:21 -0600 Subject: [PATCH 12/13] Write the .nodes and .edges as binary data --- src/gc-heap-snapshot.cpp | 47 +++++++++------- stdlib/Profile/src/Profile.jl | 6 +- stdlib/Profile/src/heapsnapshot_reassemble.jl | 55 +++++++------------ 3 files changed, 50 insertions(+), 58 deletions(-) diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 758e3d0053843..f7bffe6c97665 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -53,7 +53,7 @@ void print_str_escape_json(ios_t *stream, StringRef s) // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2598-L2601 struct Edge { - size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. + uint8_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. size_t name_or_index; // name of the field (for objects/modules) or index of array size_t from_node; // This is a deviation from the .heapsnapshot format to support streaming. size_t to_node; @@ -66,14 +66,14 @@ struct Edge { const int k_node_number_of_fields = 7; struct Node { - size_t type; // index into snapshot->node_types + uint8_t type; // index into snapshot->node_types size_t name; size_t id; // This should be a globally-unique counter, but we use the memory address size_t self_size; size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. // whether the from_node is attached or dettached from the main application state // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 - int detachedness; // 0 - unknown, 1 - attached, 2 - detached + uint8_t detachedness; // 0 - unknown, 1 - attached, 2 - detached ~Node() JL_NOTSAFEPOINT = default; }; @@ -117,8 +117,12 @@ struct HeapSnapshot { Node internal_root; // Used for streaming + // Since nodes and edges are just one giant array of integers, we stream them as + // *BINARY DATA*: a sequence of bytes, each of which is a 64-bit integer (big enough to + // fit the pointer ids). ios_t *nodes; ios_t *edges; + // These files are written out as json data. ios_t *strings; ios_t *json; @@ -172,25 +176,26 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges, void serialize_node(HeapSnapshot *snapshot, const Node &node) { // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - ios_printf(snapshot->nodes, "%zu,%zu,%zu,%zu,%zu,%zu,%d\n", - node.type, - node.name, - node.id, - node.self_size, - 0, // fake edge count for now - node.trace_node_id, - node.detachedness); + ios_write(snapshot->nodes, (char*)&node.type, sizeof(node.type)); + ios_write(snapshot->nodes, (char*)&node.name, sizeof(node.name)); + ios_write(snapshot->nodes, (char*)&node.id, sizeof(node.id)); + ios_write(snapshot->nodes, (char*)&node.self_size, sizeof(node.self_size)); + // NOTE: We don't write edge_count, since it's always 0. It will be reconstructed in + // post-processing. + ios_write(snapshot->nodes, (char*)&node.trace_node_id, sizeof(node.trace_node_id)); + ios_write(snapshot->nodes, (char*)&node.detachedness, sizeof(node.detachedness)); g_snapshot->num_nodes += 1; } void serialize_edge(HeapSnapshot *snapshot, const Edge &edge) { // ["type","name_or_index","to_node"] - ios_printf(snapshot->edges, "%zu,%zu,%zu,%zu\n", - edge.type, - edge.name_or_index, - edge.from_node, // NOTE: Row number (not adjusted for k_node_number_of_fields) - edge.to_node); // NOTE: Row number (not adjusted for k_node_number_of_fields) + ios_write(snapshot->edges, (char*)&edge.type, sizeof(edge.type)); + ios_write(snapshot->edges, (char*)&edge.name_or_index, sizeof(edge.name_or_index)); + // NOTE: Row numbers for nodes (not adjusted for k_node_number_of_fields) + ios_write(snapshot->edges, (char*)&edge.from_node, sizeof(edge.from_node)); + ios_write(snapshot->edges, (char*)&edge.to_node, sizeof(edge.to_node)); + g_snapshot->num_edges += 1; } @@ -199,7 +204,7 @@ void serialize_edge(HeapSnapshot *snapshot, const Edge &edge) void _add_internal_root(HeapSnapshot *snapshot) { snapshot->internal_root = Node{ - snapshot->node_types.find_or_create_string_id("synthetic"), + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id(""), // name 0, // id 0, // size @@ -285,7 +290,7 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT } auto node = Node{ - g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; + (uint8_t)g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; g_snapshot->names.find_or_create_string_id(name), // size_t name; (size_t)a, // size_t id; // We add 1 to self-size for the type tag that all heap-allocated objects have. @@ -310,7 +315,7 @@ static size_t record_pointer_to_gc_snapshot(void *a, size_t bytes, StringRef nam } auto node = Node{ - g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type; + (uint8_t)g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type; g_snapshot->names.find_or_create_string_id(name), // size_t name; (size_t)a, // size_t id; bytes, // size_t self_size; @@ -375,7 +380,7 @@ size_t _record_stack_frame_node(HeapSnapshot *snapshot, void *frame) JL_NOTSAFEP } auto node = Node{ - snapshot->node_types.find_or_create_string_id("synthetic"), + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), snapshot->names.find_or_create_string_id("(stack frame)"), // name (size_t)frame, // id 1, // size @@ -489,7 +494,7 @@ static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT { auto edge = Edge{ - g_snapshot->edge_types.find_or_create_string_id(edge_type), + (uint8_t)g_snapshot->edge_types.find_or_create_string_id(edge_type), name_or_idx, // edge label from_idx, // from to_idx // to diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index bb3cecc994b83..56dfbd2ff759a 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -1263,10 +1263,12 @@ function take_heap_snapshot(io::IO, all_one::Bool=false) Profile.HeapSnapshot.assemble_snapshot(prefix, io) end function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool) + # Nodes and edges are binary files open("$prefix.nodes", "w") do nodes open("$prefix.edges", "w") do edges - open("$prefix.strings", "w") do strings - open("$prefix.json", "w") do json + # The other two files are json data + open("$prefix.strings.json", "w") do strings + open("$prefix.metadata.json", "w") do json Base.@_lock_ios(nodes, Base.@_lock_ios(edges, Base.@_lock_ios(strings, diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl index 3c83dff5daa7e..8ec2a2d93c1bf 100644 --- a/stdlib/Profile/src/heapsnapshot_reassemble.jl +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -6,13 +6,13 @@ module HeapSnapshot struct Edges type::Vector{Int8} # index into `snapshot.meta.edge_types` name_or_index::Vector{UInt} # Either an index into `snapshot.strings`, or the index in an array, depending on edge_type - to_pos::Vector{UInt32} # index into `snapshot.nodes` + to_pos::Vector{UInt} # index into `snapshot.nodes` end function init_edges(n::Int) Edges( Vector{Int8}(undef, n), Vector{UInt}(undef, n), - Vector{UInt32}(undef, n), + Vector{UInt}(undef, n), ) end Base.length(n::Edges) = length(n.type) @@ -23,7 +23,7 @@ struct Nodes name_idx::Vector{UInt32} # index into `snapshot.strings` id::Vector{UInt} # unique id, in julia it is the address of the object self_size::Vector{Int} # size of the object itself, not including the size of its fields - edge_count::Vector{UInt32} # number of outgoing edges + edge_count::Vector{UInt} # number of outgoing edges edges::Edges # outgoing edges # This is the main complexity of the .heapsnapshot format, and it's the reason we need # to read in all the data before writing it out. The edges vector contains all edges, @@ -77,7 +77,7 @@ end # Manually parse and write the .json files, given that we don't have JSON import/export in # julia's stdlibs. function assemble_snapshot(in_prefix, io::IO) - preamble = read(string(in_prefix, ".json"), String) + preamble = read(string(in_prefix, ".metadata.json"), String) pos = last(findfirst("node_count\":", preamble)) + 1 endpos = findnext(==(','), preamble, pos) - 1 node_count = parse(Int, String(@view preamble[pos:endpos])) @@ -98,31 +98,21 @@ function assemble_snapshot(in_prefix, io::IO) # end # Parse nodes with empty edge counts that we need to fill later - # TODO: preallocate line buffer - for (i, line) in enumerate(eachline(string(in_prefix, ".nodes"))) + nodes_file = open(string(in_prefix, ".nodes"), "r") + for i in 1:length(nodes) shouldlog(i) && println("Parsing node $i") - iter = eachsplit(line, ',') - x, s = iterate(iter) - node_type = parse(Int8, x) - x, s = iterate(iter, s) - node_name_idx = parse(UInt32, x) - x, s = iterate(iter, s) - id = parse(UInt, x) - x, s = iterate(iter, s) - self_size = parse(Int, x) - x, s = iterate(iter, s) - edge_count = parse(UInt, x) - @assert edge_count == 0 - x, s = iterate(iter, s) - @assert parse(Int8, x) == 0 # trace_node_id - x, s = iterate(iter, s) - @assert parse(Int8, x) == 0 # detachedness + node_type = read(nodes_file, Int8) + node_name_idx = read(nodes_file, UInt) + id = read(nodes_file, UInt) + self_size = read(nodes_file, Int) + @assert read(nodes_file, Int) == 0 # trace_node_id + @assert read(nodes_file, Int8) == 0 # detachedness nodes.type[i] = node_type nodes.name_idx[i] = node_name_idx nodes.id[i] = id nodes.self_size[i] = self_size - nodes.edge_count[i] = edge_count + nodes.edge_count[i] = 0 # edge_count shouldlog(i) && begin println("Parsed node $i") @@ -135,17 +125,12 @@ function assemble_snapshot(in_prefix, io::IO) end # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets - # TODO: preallocate line buffer - for (i, line) in enumerate(eachline(string(in_prefix, ".edges"))) - iter = eachsplit(line, ',') - x, s = iterate(iter) - edge_type = parse(Int8, x) - x, s = iterate(iter, s) - edge_name_or_index = parse(UInt, x) - x, s = iterate(iter, s) - from_node = parse(Int, x) - x, s = iterate(iter, s) - to_node = parse(UInt32, x) + edges_file = open(string(in_prefix, ".edges"), "r") + for i in 1:length(nodes.edges) + edge_type = read(edges_file, Int8) + edge_name_or_index = read(edges_file, UInt) + from_node = read(edges_file, UInt) + to_node = read(edges_file, UInt) nodes.edges.type[i] = edge_type nodes.edges.name_or_index[i] = edge_name_or_index @@ -196,7 +181,7 @@ function assemble_snapshot(in_prefix, io::IO) end end println(io, "],") - open(string(in_prefix, ".strings"), "r") do strings_io + open(string(in_prefix, ".strings.json"), "r") do strings_io skip(strings_io, 2) # skip "{\n" write(io, strings_io) # strings contain the trailing "}" so we close out what we opened in preamble end From ffc47bd25f325dac1db6681374c87e3b81843661 Mon Sep 17 00:00:00 2001 From: Nathan Daly Date: Tue, 3 Oct 2023 17:50:10 -0600 Subject: [PATCH 13/13] Remove unnecessary logging --- stdlib/Profile/src/heapsnapshot_reassemble.jl | 26 +------------------ 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl index 8ec2a2d93c1bf..a6f50f2d34329 100644 --- a/stdlib/Profile/src/heapsnapshot_reassemble.jl +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -1,4 +1,4 @@ -# TODO(PR): This code hasn't been reviewed yet. +# This file is a part of Julia. License is MIT: https://julialang.org/license module HeapSnapshot @@ -88,19 +88,9 @@ function assemble_snapshot(in_prefix, io::IO) nodes = init_nodes(node_count, edge_count) - shouldlog(i) = false - # N = 100000 - # shouldlog(i) = i % 10000 == 0 - # N *= 10 - # return true - # else - # return false - # end - # Parse nodes with empty edge counts that we need to fill later nodes_file = open(string(in_prefix, ".nodes"), "r") for i in 1:length(nodes) - shouldlog(i) && println("Parsing node $i") node_type = read(nodes_file, Int8) node_name_idx = read(nodes_file, UInt) id = read(nodes_file, UInt) @@ -113,15 +103,6 @@ function assemble_snapshot(in_prefix, io::IO) nodes.id[i] = id nodes.self_size[i] = self_size nodes.edge_count[i] = 0 # edge_count - - shouldlog(i) && begin - println("Parsed node $i") - @show node_type - @show node_name_idx - @show id - @show self_size - @show edge_count - end end # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets @@ -156,7 +137,6 @@ function assemble_snapshot(in_prefix, io::IO) print(io, ",0,0") end print(io, "],\"edges\":[") - shouldloge(i) = i % 10000 == 0 e = 1 for n in 1:length(nodes) count = nodes.edge_count[n] @@ -173,10 +153,6 @@ function assemble_snapshot(in_prefix, io::IO) if !(nodes.edges.to_pos[i] % 7 == 0) @warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])" end - # shouldloge(i) && println("Edge $i: type $(nodes.edges.type[i])") - # if nodes.edges.type[i] == 2 # "element" (array index) - # println("Array Edge $i: index $(nodes.edges.name_or_index[i])") - # end e += 1 end end