Skip to content

Commit 2ddd16f

Browse files
committed
profiling: Add Task/GC/Inference/Codegen signposts for Tracy
This adds a variety of metadata to send to Tracy during profiling, in particular method instance signatures, the Heap size, and a couple of other small pieces of relevant metadata. It also adds full support for Task execution, tracking these as "Fibers" on the Tracy side. Unfortunately, enabling this is blocked on a bug on the Tracy side. The options for resolution are: A. Disable any use of Fibers. Without fibers, asynchronous workloads that generate zones across threads are unsupported and will in general break the trace. B. Disable `TRACY_ON_DEMAND`, which means that profiling data will queue up from program start, even if a client is not connected. C. Try to implement the missing support for ON_DEMAND + FIBERS upstream. (A) is what this PR does for now. Thankfully, instrumented calls in the run-time generally don't span threads so things are at least functional this way, but it'd be very nice to have a proper fix.
1 parent ec123a3 commit 2ddd16f

File tree

10 files changed

+141
-34
lines changed

10 files changed

+141
-34
lines changed

src/ast.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -784,6 +784,11 @@ JL_DLLEXPORT jl_value_t *jl_fl_parse(const char *text, size_t text_len,
784784
size_t offset, jl_value_t *options)
785785
{
786786
JL_TIMING(PARSING);
787+
#ifdef USE_TRACY
788+
TracyCZoneText(*__timing_block.tracy_ctx,
789+
jl_string_data(filename),
790+
jl_string_len(filename));
791+
#endif
787792
if (offset > text_len) {
788793
jl_value_t *textstr = jl_pchar_to_string(text, text_len);
789794
JL_GC_PUSH1(&textstr);

src/codegen.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8437,6 +8437,10 @@ jl_llvm_functions_t jl_emit_code(
84378437
jl_codegen_params_t &params)
84388438
{
84398439
JL_TIMING(CODEGEN);
8440+
#ifdef USE_TRACY
8441+
jl_timing_show((jl_value_t *)li->specTypes,
8442+
JL_TIMING_CURRENT_BLOCK);
8443+
#endif
84408444
// caller must hold codegen_lock
84418445
jl_llvm_functions_t decls = {};
84428446
assert((params.params == &jl_default_cgparams /* fast path */ || !params.cache ||
@@ -8479,6 +8483,10 @@ jl_llvm_functions_t jl_emit_codeinst(
84798483
jl_codegen_params_t &params)
84808484
{
84818485
JL_TIMING(CODEGEN);
8486+
#ifdef USE_TRACY
8487+
jl_timing_show((jl_value_t *)codeinst->def->specTypes,
8488+
JL_TIMING_CURRENT_BLOCK);
8489+
#endif
84828490
JL_GC_PUSH1(&src);
84838491
if (!src) {
84848492
src = (jl_code_info_t*)jl_atomic_load_relaxed(&codeinst->inferred);

src/gc.c

Lines changed: 7 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -3108,18 +3108,9 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
31083108
return;
31093109
}
31103110
#ifdef USE_TRACY
3111-
static uint8_t first_time = 1;
3112-
if (first_time) {
3113-
first_time = 0;
3114-
TracyCFiberEnter("Main");
3115-
}
3116-
TracyCFiberLeave;
3117-
TracyCFiberEnter("GC");
3118-
{
3119-
int64_t tb;
3120-
jl_gc_get_total_bytes(&tb);
3121-
TracyCPlot("Heap size", ((double)tb) / (1024.0 * 1024.0));
3122-
}
3111+
//TracyCFiberLeave;
3112+
//TracyCFiberEnter("GC");
3113+
TracyCPlot("Heap size (MB)", ((double)jl_gc_live_bytes()) / (1024.0 * 1024.0));
31233114
#endif
31243115
{
31253116
JL_TIMING(GC);
@@ -3185,15 +3176,11 @@ JL_DLLEXPORT void jl_gc_collect(jl_gc_collection_t collection)
31853176
SetLastError(last_error);
31863177
#endif
31873178
errno = last_errno;
3188-
}
3179+
} // TODO: Better RAII (maybe use a dummy constructor RAII type)
31893180
#ifdef USE_TRACY
3190-
{
3191-
int64_t tb;
3192-
jl_gc_get_total_bytes(&tb);
3193-
TracyCPlot("Heap size", ((double)tb) / (1024.0 * 1024.0));
3194-
}
3195-
TracyCFiberLeave;
3196-
TracyCFiberEnter("Main");
3181+
TracyCPlot("Heap size (MB)", ((double)jl_gc_live_bytes()) / (1024.0 * 1024.0));
3182+
//TracyCFiberLeave;
3183+
//TracyCFiberEnter(ct->name);
31973184
#endif
31983185
}
31993186

src/gf.c

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,9 @@ jl_datatype_t *jl_mk_builtin_func(jl_datatype_t *dt, const char *name, jl_fptr_a
312312
jl_code_info_t *jl_type_infer(jl_method_instance_t *mi, size_t world, int force)
313313
{
314314
JL_TIMING(INFERENCE);
315+
#ifdef USE_TRACY
316+
jl_timing_show(mi->specTypes, JL_TIMING_CURRENT_BLOCK);
317+
#endif
315318
if (jl_typeinf_func == NULL)
316319
return NULL;
317320
jl_task_t *ct = jl_current_task;
@@ -1923,6 +1926,9 @@ JL_DLLEXPORT void jl_method_table_insert(jl_methtable_t *mt, jl_method_t *method
19231926
JL_TIMING(ADD_METHOD);
19241927
assert(jl_is_method(method));
19251928
assert(jl_is_mtable(mt));
1929+
#ifdef USE_TRACY
1930+
jl_timing_show((jl_value_t *)method, JL_TIMING_CURRENT_BLOCK);
1931+
#endif
19261932
jl_value_t *type = method->sig;
19271933
jl_value_t *oldvalue = NULL;
19281934
jl_array_t *oldmi = NULL;

src/jitlayers.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,8 @@ static jl_callptr_t _jl_compile_codeinst(
192192
"invalid world for method-instance");
193193
assert(src && jl_is_code_info(src));
194194

195+
JL_TIMING(LLVM_MODULE_FINISH);
196+
195197
jl_callptr_t fptr = NULL;
196198
// emit the code in LLVM IR form
197199
jl_codegen_params_t params(std::move(context), jl_ExecutionEngine->getDataLayout(), jl_ExecutionEngine->getTargetTriple()); // Locks the context
@@ -245,10 +247,12 @@ static jl_callptr_t _jl_compile_codeinst(
245247
MaxWorkqueueSize.updateMax(emitted.size());
246248
IndirectCodeinsts += emitted.size() - 1;
247249
}
248-
JL_TIMING(LLVM_MODULE_FINISH);
249250

250251
for (auto &def : emitted) {
251252
jl_code_instance_t *this_code = def.first;
253+
#ifdef USE_TRACY
254+
jl_timing_show(this_code->def->specTypes, JL_TIMING_CURRENT_BLOCK);
255+
#endif
252256
jl_llvm_functions_t decls = std::get<1>(def.second);
253257
jl_callptr_t addr;
254258
bool isspecsig = false;

src/julia.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1931,6 +1931,10 @@ typedef struct _jl_task_t {
19311931
uint16_t priority;
19321932

19331933
// hidden state:
1934+
1935+
#ifdef USE_TRACY
1936+
const char *name;
1937+
#endif
19341938
// id of owning thread - does not need to be defined until the task runs
19351939
_Atomic(int16_t) tid;
19361940
// threadpool id

src/task.c

Lines changed: 39 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -640,6 +640,9 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
640640
jl_error("cannot switch to task running on another thread");
641641

642642
JL_PROBE_RT_PAUSE_TASK(ct);
643+
//#ifdef USE_TRACY
644+
//TracyCFiberLeave;
645+
//#endif
643646

644647
// Store old values on the stack and reset
645648
sig_atomic_t defer_signal = ptls->defer_signal;
@@ -688,6 +691,9 @@ JL_DLLEXPORT void jl_switch(void) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER
688691
jl_sigint_safepoint(ptls);
689692

690693
JL_PROBE_RT_RUN_TASK(ct);
694+
//#ifdef USE_TRACY
695+
//TracyCFiberEnter(ct->name);
696+
//#endif
691697
jl_gc_unsafe_leave(ptls, gc_state);
692698
}
693699

@@ -923,6 +929,23 @@ JL_DLLEXPORT jl_task_t *jl_new_task(jl_function_t *start, jl_value_t *completion
923929
t->start = start;
924930
t->result = jl_nothing;
925931
t->donenotify = completion_future;
932+
#ifdef USE_TRACY
933+
jl_value_t *start_type = jl_typeof(t->start);
934+
const char *start_name = "";
935+
if (jl_is_datatype(start_type))
936+
start_name = jl_symbol_name(((jl_datatype_t *) start_type)->name->name);
937+
938+
// 16 characters in "Task 65535 (\"\")\0";
939+
static uint16_t task_id = 1;
940+
size_t fiber_name_len = strlen(start_name) + 16;
941+
942+
// XXX: Tracy uses this as a handle internally and requires that this
943+
// string live forever, so this allocation is intentionally leaked.
944+
char *fiber_name = (char *)malloc(fiber_name_len);
945+
snprintf(fiber_name, fiber_name_len, "Task %d (\"%s\")", task_id++, start_name);
946+
947+
t->name = fiber_name;
948+
#endif
926949
jl_atomic_store_relaxed(&t->_isexception, 0);
927950
// Inherit logger state from parent task
928951
t->logstate = ct->logstate;
@@ -1078,6 +1101,9 @@ CFI_NORETURN
10781101

10791102
ct->started = 1;
10801103
JL_PROBE_RT_START_TASK(ct);
1104+
//#ifdef USE_TRACY
1105+
//TracyCFiberEnter(ct->name);
1106+
//#endif
10811107
if (jl_atomic_load_relaxed(&ct->_isexception)) {
10821108
record_backtrace(ptls, 0);
10831109
jl_push_excstack(&ct->excstack, ct->result,
@@ -1090,14 +1116,9 @@ CFI_NORETURN
10901116
ptls->defer_signal = 0;
10911117
jl_sigint_safepoint(ptls);
10921118
}
1093-
//#ifdef USE_TRACY
1094-
//TracyFiberEnter(fiber);
1095-
//#endif
1096-
{
1097-
// TODO: Re-enable
1098-
//JL_TIMING(ROOT);
1099-
res = jl_apply(&ct->start, 1);
1100-
}
1119+
// TODO: Re-enable
1120+
//JL_TIMING(ROOT);
1121+
res = jl_apply(&ct->start, 1);
11011122
}
11021123
JL_CATCH {
11031124
res = jl_current_exception();
@@ -1106,9 +1127,6 @@ CFI_NORETURN
11061127
}
11071128
skip_pop_exception:;
11081129
}
1109-
//#ifdef USE_TRACY
1110-
//TracyFiberLeave(fiber);
1111-
//#endif
11121130
ct->result = res;
11131131
jl_gc_wb(ct, ct->result);
11141132
jl_finish_task(ct);
@@ -1519,6 +1537,12 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
15191537
ct->stkbuf = stack;
15201538
ct->bufsz = ssize;
15211539
}
1540+
1541+
#ifdef USE_TRACY
1542+
char *unique_string = (char *)malloc(strlen("Root") + 1);
1543+
strcpy(unique_string, "Root");
1544+
ct->name = unique_string;
1545+
#endif // USE_TRACY
15221546
ct->started = 1;
15231547
ct->next = jl_nothing;
15241548
ct->queue = jl_nothing;
@@ -1551,6 +1575,10 @@ jl_task_t *jl_init_root_task(jl_ptls_t ptls, void *stack_lo, void *stack_hi)
15511575
ct->ctx.asan_fake_stack = NULL;
15521576
#endif
15531577

1578+
//#ifdef USE_TRACY
1579+
//TracyCFiberEnter(ct->name);
1580+
//#endif
1581+
15541582
#ifdef COPY_STACKS
15551583
// initialize the base_ctx from which all future copy_stacks will be copies
15561584
if (always_copy_stacks) {

src/timing.c

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ extern "C" {
1818
#endif
1919

2020
static uint64_t t0;
21+
JL_DLLEXPORT uint64_t jl_timing_enable_mask = 0xFFFFFFFFFFFFFFFE;
2122
JL_DLLEXPORT uint64_t jl_timing_data[(int)JL_TIMING_LAST] = {0};
2223
const char *jl_timing_names[(int)JL_TIMING_LAST] =
2324
{
@@ -72,6 +73,43 @@ void jl_timing_block_stop(jl_timing_block_t *cur_block)
7273
_jl_timing_block_stop(cur_block, cycleclock());
7374
}
7475

76+
JL_DLLEXPORT void jl_timing_show(jl_value_t *v, jl_timing_block_t *cur_block) {
77+
#ifdef USE_TRACY
78+
ios_t buf;
79+
ios_mem(&buf, IOS_INLSIZE);
80+
buf.growable = 0; // Restrict to inline buffer to avoid allocation
81+
82+
jl_static_show((JL_STREAM*)&buf, v);
83+
if (buf.size == buf.maxsize)
84+
memset(&buf.buf[IOS_INLSIZE - 3], '.', 3);
85+
86+
TracyCZoneText(*(cur_block->tracy_ctx), buf.buf, buf.size);
87+
#endif
88+
}
89+
90+
JL_DLLEXPORT uint64_t jl_timing_get_enable_mask(void) {
91+
return jl_timing_enable_mask;
92+
}
93+
94+
JL_DLLEXPORT void jl_timing_set_enable_mask(uint64_t mask) {
95+
jl_timing_enable_mask = mask;
96+
}
97+
98+
JL_DLLEXPORT int jl_timing_set_enable(const char *subsystem, uint8_t enabled) {
99+
for (int i = 0; i < JL_TIMING_LAST; i++) {
100+
if (strcmp(subsystem, jl_timing_names[i]) == 0) {
101+
uint64_t subsystem_bit = (1ul << i);
102+
if (enabled) {
103+
jl_timing_enable_mask |= subsystem_bit;
104+
} else {
105+
jl_timing_enable_mask &= subsystem_bit;
106+
}
107+
return 0;
108+
}
109+
}
110+
return -1;
111+
}
112+
75113
#else
76114

77115
void jl_init_timing(void) { }

src/timing.h

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,22 @@ void jl_print_timings(void);
2828
jl_timing_block_t *jl_pop_timing_block(jl_timing_block_t *cur_block);
2929
void jl_timing_block_start(jl_timing_block_t *cur_block);
3030
void jl_timing_block_stop(jl_timing_block_t *cur_block);
31+
32+
// Add the output of `jl_static_show(x)` as a text annotation to the
33+
// zone corresponding to `ctx`.
34+
//
35+
// If larger than IOS_INLSIZE (~80 characters), text is truncated.
36+
void jl_timing_show(jl_value_t *v, jl_timing_block_t *cur_block);
37+
38+
// TODO: Replace this with an atomic API
39+
uint64_t jl_timing_get_enable_mask(void);
40+
void jl_timing_set_enable_mask(uint64_t mask);
41+
42+
// Update the enable bit-mask to enable/disable tracing events for
43+
// the subsystem in `jl_timing_names` matching the provided string.
44+
//
45+
// Returns -1 if no matching sub-system was found.
46+
int jl_timing_set_enable(const char *subsystem, uint8_t enabled);
3147
#ifdef __cplusplus
3248
}
3349
#endif
@@ -44,6 +60,12 @@ void jl_timing_block_stop(jl_timing_block_t *cur_block);
4460
#define JL_TIMING(owner)
4561
#else
4662

63+
#ifdef __cplusplus
64+
#define JL_TIMING_CURRENT_BLOCK (&__timing_block.block)
65+
#else
66+
#define JL_TIMING_CURRENT_BLOCK (&__timing_block)
67+
#endif
68+
4769
#define JL_TIMING_OWNERS \
4870
X(ROOT), \
4971
X(GC), \
@@ -77,6 +99,7 @@ enum jl_timing_owners {
7799
JL_TIMING_LAST
78100
};
79101

102+
extern uint64_t jl_timing_enable_mask;
80103
extern uint64_t jl_timing_data[(int)JL_TIMING_LAST];
81104
extern const char *jl_timing_names[(int)JL_TIMING_LAST];
82105

@@ -161,7 +184,7 @@ struct jl_timing_block_cpp_t {
161184
};
162185
#ifdef USE_TRACY
163186
#define JL_TIMING(owner) jl_timing_block_cpp_t __timing_block(JL_TIMING_ ## owner); \
164-
TracyCZoneN(__tracy_ctx, #owner, strcmp(#owner, "ROOT")); \
187+
TracyCZoneN(__tracy_ctx, #owner, (jl_timing_enable_mask >> (JL_TIMING_ ## owner)) & 1); \
165188
__timing_block.block.tracy_ctx = &__tracy_ctx;
166189
#else
167190
#define JL_TIMING(owner) jl_timing_block_cpp_t __timing_block(JL_TIMING_ ## owner)
@@ -172,7 +195,7 @@ struct jl_timing_block_cpp_t {
172195
__attribute__((cleanup(_jl_timing_block_destroy))) \
173196
jl_timing_block_t __timing_block; \
174197
_jl_timing_block_ctor(&__timing_block, JL_TIMING_ ## owner); \
175-
TracyCZoneN(__tracy_ctx, #owner, 1); \
198+
TracyCZoneN(__tracy_ctx, #owner, (jl_timing_enable_mask >> (JL_TIMING_ ## owner)) & 1); \
176199
__timing_block.tracy_ctx = &__tracy_ctx;
177200
#else
178201
#define JL_TIMING(owner) \

src/toplevel.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,10 @@ static jl_function_t *jl_module_get_initializer(jl_module_t *m JL_PROPAGATES_ROO
6565
void jl_module_run_initializer(jl_module_t *m)
6666
{
6767
JL_TIMING(INIT_MODULE);
68+
#ifdef USE_TRACY
69+
const char *module_name = jl_symbol_name(m->name);
70+
TracyCZoneText(*__timing_block.tracy_ctx, module_name, strlen(module_name));
71+
#endif
6872
jl_function_t *f = jl_module_get_initializer(m);
6973
if (f == NULL)
7074
return;

0 commit comments

Comments
 (0)