Skip to content

Commit 8276c19

Browse files
committed
COUTIME
1 parent 6e7795c commit 8276c19

File tree

9 files changed

+181
-30
lines changed

9 files changed

+181
-30
lines changed

Include/internal/pycore_global_objects_fini_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_global_strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -387,6 +387,7 @@ struct _Py_global_strings {
387387
STRUCT_FOR_ID(coro)
388388
STRUCT_FOR_ID(count)
389389
STRUCT_FOR_ID(covariant)
390+
STRUCT_FOR_ID(cpu_time)
390391
STRUCT_FOR_ID(ctx)
391392
STRUCT_FOR_ID(cwd)
392393
STRUCT_FOR_ID(d_parameter_type)

Include/internal/pycore_runtime_init_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_unicodeobject_generated.h

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/profiling/sampling/collector.py

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,17 @@
11
from abc import ABC, abstractmethod
22

3+
# Enums are slow
4+
THREAD_STATE_RUNNING = 0
5+
THREAD_STATE_IDLE = 1
6+
THREAD_STATE_GIL_WAIT = 2
7+
THREAD_STATE_UNKNOWN = 3
8+
9+
STATUS = {
10+
THREAD_STATE_RUNNING: "running",
11+
THREAD_STATE_IDLE: "idle",
12+
THREAD_STATE_GIL_WAIT: "gil_wait",
13+
THREAD_STATE_UNKNOWN: "unknown",
14+
}
315

416
class Collector(ABC):
517
@abstractmethod
@@ -12,10 +24,8 @@ def export(self, filename):
1224

1325
def _iter_all_frames(self, stack_frames, skip_idle=False):
1426
"""Iterate over all frame stacks from all interpreters and threads."""
15-
print("Skipping idle threads" if skip_idle else "Including idle threads")
1627
for interpreter_info in stack_frames:
1728
for thread_info in interpreter_info.threads:
18-
print(thread_info.status)
1929
frames = thread_info.frame_info
2030
if frames:
2131
yield frames

Lib/profiling/sampling/sample.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -114,18 +114,18 @@ def _run_with_sync(original_cmd):
114114

115115

116116
class SampleProfiler:
117-
def __init__(self, pid, sample_interval_usec, all_threads):
117+
def __init__(self, pid, sample_interval_usec, all_threads, *, cpu_time=False):
118118
self.pid = pid
119119
self.sample_interval_usec = sample_interval_usec
120120
self.all_threads = all_threads
121121
if _FREE_THREADED_BUILD:
122122
self.unwinder = _remote_debugging.RemoteUnwinder(
123-
self.pid, all_threads=self.all_threads
123+
self.pid, all_threads=self.all_threads, cpu_time=cpu_time
124124
)
125125
else:
126126
only_active_threads = bool(self.all_threads)
127127
self.unwinder = _remote_debugging.RemoteUnwinder(
128-
self.pid, only_active_thread=only_active_threads
128+
self.pid, only_active_thread=only_active_threads, cpu_time=cpu_time
129129
)
130130
# Track sample intervals and total sample count
131131
self.sample_intervals = deque(maxlen=100)
@@ -586,7 +586,7 @@ def sample(
586586
skip_idle=False,
587587
):
588588
profiler = SampleProfiler(
589-
pid, sample_interval_usec, all_threads=all_threads
589+
pid, sample_interval_usec, all_threads=all_threads, cpu_time=skip_idle
590590
)
591591
profiler.realtime_stats = realtime_stats
592592

Lib/test/test_external_inspection.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,10 +1744,16 @@ def busy():
17441744
except Exception:
17451745
pass
17461746
break
1747-
1748-
unwinder = RemoteUnwinder(p.pid, all_threads=True)
1749-
time.sleep(0.2) # Give a bit of time to let threads settle
1750-
traces = unwinder.get_stack_trace()
1747+
1748+
try:
1749+
unwinder = RemoteUnwinder(p.pid, all_threads=True, cpu_time=True)
1750+
time.sleep(0.2) # Give a bit of time to let threads settle
1751+
traces = unwinder.get_stack_trace()
1752+
except PermissionError:
1753+
self.skipTest(
1754+
"Insufficient permissions to read the stack trace"
1755+
)
1756+
17511757

17521758
# Find threads and their statuses
17531759
statuses = {}

Modules/_remote_debugging_module.c

Lines changed: 143 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,24 @@
4141
#define MAX_NATIVE_THREADS 4096
4242
#endif
4343

44+
#ifdef MS_WINDOWS
45+
#include <windows.h>
46+
#include <winternl.h>
47+
// Define the NTSTATUS values we need
48+
#define STATUS_SUCCESS ((NTSTATUS)0x00000000L)
49+
#define STATUS_INFO_LENGTH_MISMATCH ((NTSTATUS)0xC0000004L)
50+
typedef enum _WIN32_THREADSTATE {
51+
WIN32_THREADSTATE_INITIALIZED = 0, // Recognized by the kernel
52+
WIN32_THREADSTATE_READY = 1, // Prepared to run on the next available processor
53+
WIN32_THREADSTATE_RUNNING = 2, // Currently executing
54+
WIN32_THREADSTATE_STANDBY = 3, // About to run, only one thread may be in this state at a time
55+
WIN32_THREADSTATE_TERMINATED = 4, // Finished executing
56+
WIN32_THREADSTATE_WAITING = 5, // Not ready for the processor, when ready, it will be rescheduled
57+
WIN32_THREADSTATE_TRANSITION = 6, // Waiting for resources other than the processor
58+
WIN32_THREADSTATE_UNKNOWN = 7 // Thread state is unknown
59+
} WIN32_THREADSTATE;
60+
#endif
61+
4462
/* ============================================================================
4563
* TYPE DEFINITIONS AND STRUCTURES
4664
* ============================================================================ */
@@ -219,9 +237,10 @@ typedef struct {
219237
PyTypeObject *AwaitedInfo_Type;
220238
} RemoteDebuggingState;
221239

222-
enum ThreadState {
240+
enum _ThreadState {
223241
THREAD_STATE_RUNNING,
224242
THREAD_STATE_IDLE,
243+
THREAD_STATE_GIL_WAIT,
225244
THREAD_STATE_UNKNOWN
226245
};
227246

@@ -248,6 +267,10 @@ typedef struct {
248267
#ifdef __APPLE__
249268
uint64_t thread_id_offset;
250269
#endif
270+
#ifdef MS_WINDOWS
271+
PVOID win_process_buffer;
272+
ULONG win_process_buffer_size;
273+
#endif
251274
} RemoteUnwinderObject;
252275

253276
#define RemoteUnwinder_CAST(op) ((RemoteUnwinderObject *)(op))
@@ -2472,7 +2495,7 @@ process_frame_chain(
24722495
}
24732496

24742497
static int
2475-
get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid) {
2498+
get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid, uint64_t pthread_id) {
24762499
#ifdef __APPLE__
24772500
if (unwinder->thread_id_offset == 0) {
24782501
// Apple is a bit of a circus and doesn't give us a straightforward way to get thread information
@@ -2493,7 +2516,7 @@ get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid) {
24932516
}
24942517
uint64_t min_offset = UINT64_MAX;
24952518
for (int i = 0; i < n; i++) {
2496-
uint64_t offset = tids[i] - tid;
2519+
uint64_t offset = tids[i] - pthread_id;
24972520
if (offset < min_offset) {
24982521
min_offset = offset;
24992522
}
@@ -2502,14 +2525,92 @@ get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid) {
25022525
PyMem_Free(tids);
25032526
}
25042527
struct proc_threadinfo ti;
2505-
uint64_t tid_with_offset = tid + unwinder->thread_id_offset;
2528+
uint64_t tid_with_offset = pthread_id + unwinder->thread_id_offset;
25062529
if (proc_pidinfo(unwinder->handle.pid, PROC_PIDTHREADINFO, tid_with_offset, &ti, sizeof(ti)) != sizeof(ti)) {
25072530
return THREAD_STATE_UNKNOWN;
25082531
}
25092532
if (ti.pth_run_state == TH_STATE_RUNNING) {
25102533
return THREAD_STATE_RUNNING;
25112534
}
25122535
return THREAD_STATE_IDLE;
2536+
#elif defined(__linux__)
2537+
char stat_path[256];
2538+
char buffer[2048] = "";
2539+
2540+
snprintf(stat_path, sizeof(stat_path), "/proc/%d/task/%lu/stat", unwinder->handle.pid, tid);
2541+
2542+
int fd = open(stat_path, O_RDONLY);
2543+
if (fd == -1) {
2544+
return THREAD_STATE_UNKNOWN;
2545+
}
2546+
2547+
if (read(fd, buffer, 2047) == 0) {
2548+
close(fd);
2549+
return THREAD_STATE_UNKNOWN;
2550+
}
2551+
close(fd);
2552+
2553+
char *p = strchr(buffer, ')');
2554+
if (!p) {
2555+
return THREAD_STATE_UNKNOWN;
2556+
}
2557+
2558+
p += 2; // Skip ") "
2559+
if (*p == ' ') {
2560+
p++;
2561+
}
2562+
2563+
switch (*p) {
2564+
case 'R': // Running
2565+
return THREAD_STATE_RUNNING;
2566+
case 'S': // Interruptible sleep
2567+
case 'D': // Uninterruptible sleep
2568+
case 'T': // Stopped
2569+
case 'Z': // Zombie
2570+
case 'I': // Idle kernel thread
2571+
return THREAD_STATE_IDLE;
2572+
default:
2573+
return THREAD_STATE_UNKNOWN;
2574+
}
2575+
#elif defined(MS_WINDOWS)
2576+
ULONG n;
2577+
NTSTATUS status = NtQuerySystemInformation(
2578+
SystemProcessInformation,
2579+
unwinder->win_process_buffer,
2580+
unwinder->win_process_buffer_size,
2581+
&n
2582+
);
2583+
if (status == STATUS_INFO_LENGTH_MISMATCH) {
2584+
// Buffer was too small so we reallocate a larger one and try again.
2585+
unwinder->win_process_buffer_size = n;
2586+
PVOID new_buffer = PyMem_Realloc(unwinder->win_process_buffer, n);
2587+
if (!new_buffer) {
2588+
return -1;
2589+
}
2590+
unwinder->win_process_buffer = new_buffer;
2591+
return get_thread_status(unwinder, tid, pthread_id);
2592+
}
2593+
if (status != STATUS_SUCCESS) {
2594+
return -1;
2595+
}
2596+
2597+
SYSTEM_PROCESS_INFORMATION *pi = (SYSTEM_PROCESS_INFORMATION *)unwinder->win_process_buffer;
2598+
while ((ULONG)(ULONG_PTR)pi->UniqueProcessId != unwinder->handle.pid) {
2599+
if (pi->NextEntryOffset == 0) {
2600+
// We didn't find the process
2601+
return -1;
2602+
}
2603+
pi = (SYSTEM_PROCESS_INFORMATION *)(((BYTE *)pi) + pi->NextEntryOffset);
2604+
}
2605+
2606+
SYSTEM_THREAD_INFORMATION *ti = (SYSTEM_THREAD_INFORMATION *)((char *)pi + sizeof(SYSTEM_PROCESS_INFORMATION));
2607+
for (Py_ssize_t i = 0; i < pi->NumberOfThreads; i++, ti++) {
2608+
if (ti->ClientId.UniqueThread == (HANDLE)tid) {
2609+
return ti->ThreadState != WIN32_THREADSTATE_RUNNING ? THREAD_STATE_IDLE : THREAD_STATE_RUNNING;
2610+
}
2611+
}
2612+
2613+
return -1;
25132614
#else
25142615
return THREAD_STATE_UNKNOWN;
25152616
#endif
@@ -2518,7 +2619,8 @@ get_thread_status(RemoteUnwinderObject *unwinder, uint64_t tid) {
25182619
static PyObject*
25192620
unwind_stack_for_thread(
25202621
RemoteUnwinderObject *unwinder,
2521-
uintptr_t *current_tstate
2622+
uintptr_t *current_tstate,
2623+
uintptr_t gil_holder_tstate
25222624
) {
25232625
PyObject *frame_info = NULL;
25242626
PyObject *thread_id = NULL;
@@ -2546,14 +2648,27 @@ unwind_stack_for_thread(
25462648
goto error;
25472649
}
25482650

2651+
long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id);
2652+
int status = THREAD_STATE_UNKNOWN;
2653+
if (unwinder->cpu_time == 1) {
2654+
long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);
2655+
status = get_thread_status(unwinder, tid, pthread_id);
2656+
if (status == -1) {
2657+
PyErr_Print();
2658+
PyErr_SetString(PyExc_RuntimeError, "Failed to get thread status");
2659+
goto error;
2660+
}
2661+
} else {
2662+
status = (*current_tstate == gil_holder_tstate) ? THREAD_STATE_RUNNING : THREAD_STATE_GIL_WAIT;
2663+
}
2664+
25492665
if (process_frame_chain(unwinder, frame_addr, &chunks, frame_info) < 0) {
25502666
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to process frame chain");
25512667
goto error;
25522668
}
25532669

25542670
*current_tstate = GET_MEMBER(uintptr_t, ts, unwinder->debug_offsets.thread_state.next);
25552671

2556-
long tid = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.native_thread_id);
25572672
thread_id = PyLong_FromLongLong(tid);
25582673
if (thread_id == NULL) {
25592674
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread ID");
@@ -2567,13 +2682,6 @@ unwind_stack_for_thread(
25672682
goto error;
25682683
}
25692684

2570-
long pthread_id = GET_MEMBER(long, ts, unwinder->debug_offsets.thread_state.thread_id);
2571-
int status = get_thread_status(unwinder, pthread_id);
2572-
if (status == -1) {
2573-
PyErr_Print();
2574-
PyErr_SetString(PyExc_RuntimeError, "Failed to get thread status");
2575-
goto error;
2576-
}
25772685
PyObject *py_status = PyLong_FromLong(status);
25782686
if (py_status == NULL) {
25792687
set_exception_cause(unwinder, PyExc_RuntimeError, "Failed to create thread status");
@@ -2740,6 +2848,11 @@ _remote_debugging_RemoteUnwinder___init___impl(RemoteUnwinderObject *self,
27402848
self->thread_id_offset = 0;
27412849
#endif
27422850

2851+
#ifdef MS_WINDOWS
2852+
self->win_process_buffer = NULL;
2853+
self->win_process_buffer_size = 0;
2854+
#endif
2855+
27432856
return 0;
27442857
}
27452858

@@ -2845,21 +2958,25 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
28452958
goto exit;
28462959
}
28472960

2961+
// Get the GIL holder for this interpreter (needed for GIL_WAIT logic)
2962+
uintptr_t gil_holder_tstate = 0;
2963+
int gil_locked = GET_MEMBER(int, interp_state_buffer,
2964+
self->debug_offsets.interpreter_state.gil_runtime_state_locked);
2965+
if (gil_locked) {
2966+
gil_holder_tstate = (uintptr_t)GET_MEMBER(PyThreadState*, interp_state_buffer,
2967+
self->debug_offsets.interpreter_state.gil_runtime_state_holder);
2968+
}
2969+
28482970
uintptr_t current_tstate;
28492971
if (self->only_active_thread) {
28502972
// Find the GIL holder for THIS interpreter
2851-
int gil_locked = GET_MEMBER(int, interp_state_buffer,
2852-
self->debug_offsets.interpreter_state.gil_runtime_state_locked);
2853-
28542973
if (!gil_locked) {
28552974
// This interpreter's GIL is not locked, skip it
28562975
Py_DECREF(interpreter_threads);
28572976
goto next_interpreter;
28582977
}
28592978

2860-
// Get the GIL holder for this interpreter
2861-
current_tstate = (uintptr_t)GET_MEMBER(PyThreadState*, interp_state_buffer,
2862-
self->debug_offsets.interpreter_state.gil_runtime_state_holder);
2979+
current_tstate = gil_holder_tstate;
28632980
} else if (self->tstate_addr == 0) {
28642981
// Get all threads for this interpreter
28652982
current_tstate = GET_MEMBER(uintptr_t, interp_state_buffer,
@@ -2870,7 +2987,7 @@ _remote_debugging_RemoteUnwinder_get_stack_trace_impl(RemoteUnwinderObject *self
28702987
}
28712988

28722989
while (current_tstate != 0) {
2873-
PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate);
2990+
PyObject* frame_info = unwind_stack_for_thread(self, &current_tstate, gil_holder_tstate);
28742991
if (!frame_info) {
28752992
Py_DECREF(interpreter_threads);
28762993
set_exception_cause(self, PyExc_RuntimeError, "Failed to unwind stack for thread");
@@ -3122,6 +3239,12 @@ RemoteUnwinder_dealloc(PyObject *op)
31223239
if (self->code_object_cache) {
31233240
_Py_hashtable_destroy(self->code_object_cache);
31243241
}
3242+
#ifdef MS_WINDOWS
3243+
if(self->win_process_buffer != NULL) {
3244+
PyMem_Free(self->win_process_buffer);
3245+
}
3246+
#endif
3247+
31253248
#ifdef Py_GIL_DISABLED
31263249
if (self->tlbc_cache) {
31273250
_Py_hashtable_destroy(self->tlbc_cache);

0 commit comments

Comments
 (0)