1717
1818#include "pydtrace.h"
1919
20+ // Platform-specific includes for get_current_rss().
21+ #ifdef _WIN32
22+ #include <windows.h>
23+ #include <psapi.h> // For GetProcessMemoryInfo
24+ #elif defined(__linux__ )
25+ #include <unistd.h> // For sysconf, getpid
26+ #elif defined(__APPLE__ )
27+ #include <mach/mach.h>
28+ #include <unistd.h> // For sysconf, getpid
29+ #elif defined(__FreeBSD__ )
30+ #include <sys/types.h>
31+ #include <sys/sysctl.h>
32+ #include <sys/user.h> // Requires sys/user.h for kinfo_proc definition
33+ #include <kvm.h>
34+ #include <unistd.h> // For sysconf, getpid
35+ #include <fcntl.h> // For O_RDONLY
36+ #include <limits.h> // For _POSIX2_LINE_MAX
37+ #elif defined(__OpenBSD__ )
38+ #include <sys/types.h>
39+ #include <sys/sysctl.h>
40+ #include <sys/user.h> // For kinfo_proc
41+ #include <unistd.h> // For sysconf, getpid
42+ #endif
2043
2144// enable the "mark alive" pass of GC
2245#define GC_ENABLE_MARK_ALIVE 1
@@ -1878,6 +1901,180 @@ cleanup_worklist(struct worklist *worklist)
18781901 }
18791902}
18801903
1904+ // Return the current resident set size (RSS) of the process, in units of KB.
1905+ // Returns -1 if this operation is not supported or on failure.
1906+ static Py_ssize_t
1907+ get_current_rss (void )
1908+ {
1909+ #ifdef _WIN32
1910+ // Windows implementation using GetProcessMemoryInfo
1911+ PROCESS_MEMORY_COUNTERS pmc ;
1912+ HANDLE hProcess = GetCurrentProcess ();
1913+ if (NULL == hProcess ) {
1914+ // Should not happen for the current process
1915+ return -1 ;
1916+ }
1917+
1918+ // GetProcessMemoryInfo returns non-zero on success
1919+ if (GetProcessMemoryInfo (hProcess , & pmc , sizeof (pmc ))) {
1920+ // pmc.WorkingSetSize is in bytes. Convert to KB.
1921+ return (Py_ssize_t )(pmc .WorkingSetSize / 1024 );
1922+ }
1923+ else {
1924+ return -1 ;
1925+ }
1926+
1927+ #elif __linux__
1928+ // Linux implementation using /proc/self/statm
1929+ long page_size_bytes = sysconf (_SC_PAGE_SIZE );
1930+ if (page_size_bytes <= 0 ) {
1931+ return -1 ;
1932+ }
1933+
1934+ FILE * fp = fopen ("/proc/self/statm" , "r" );
1935+ if (fp == NULL ) {
1936+ return -1 ;
1937+ }
1938+
1939+ // Second number is resident size in pages
1940+ long rss_pages ;
1941+ if (fscanf (fp , "%*d %ld" , & rss_pages ) != 1 ) {
1942+ fclose (fp );
1943+ return -1 ;
1944+ }
1945+ fclose (fp );
1946+
1947+ // Sanity check
1948+ if (rss_pages < 0 || rss_pages > 1000000000 ) {
1949+ return -1 ;
1950+ }
1951+
1952+ // Convert unit to KB
1953+ return (Py_ssize_t )rss_pages * (page_size_bytes / 1024 );
1954+
1955+ #elif defined(__APPLE__ )
1956+ // --- MacOS (Darwin) ---
1957+ mach_msg_type_number_t count = MACH_TASK_BASIC_INFO_COUNT ;
1958+ mach_task_basic_info_data_t info ;
1959+ kern_return_t kerr ;
1960+
1961+ kerr = task_info (mach_task_self (), MACH_TASK_BASIC_INFO , (task_info_t )& info , & count );
1962+ if (kerr != KERN_SUCCESS ) {
1963+ return -1 ;
1964+ }
1965+ // info.resident_size is in bytes. Convert to KB.
1966+ return (Py_ssize_t )(info .resident_size / 1024 );
1967+
1968+ #elif defined(__FreeBSD__ )
1969+ long page_size_kb = sysconf (_SC_PAGESIZE ) / 1024 ;
1970+ if (page_size_kb <= 0 ) {
1971+ return -1 ;
1972+ }
1973+
1974+ // Using /dev/null for vmcore avoids needing dump file.
1975+ // NULL for kernel file uses running kernel.
1976+ char errbuf [_POSIX2_LINE_MAX ]; // For kvm error messages
1977+ kvm_t * kd = kvm_openfiles (NULL , "/dev/null" , NULL , O_RDONLY , errbuf );
1978+ if (kd == NULL ) {
1979+ return -1 ;
1980+ }
1981+
1982+ // KERN_PROC_PID filters for the specific process ID
1983+ // n_procs will contain the number of processes returned (should be 1 or 0)
1984+ pid_t pid = getpid ();
1985+ int n_procs ;
1986+ struct kinfo_proc * kp = kvm_getprocs (kd , KERN_PROC_PID , pid , & n_procs );
1987+ if (kp == NULL ) {
1988+ kvm_close (kd );
1989+ return -1 ;
1990+ }
1991+
1992+ Py_ssize_t rss_kb = -1 ;
1993+ if (n_procs > 0 ) {
1994+ // kp[0] contains the info for our process
1995+ // ki_rssize is in pages. Convert to KB.
1996+ rss_kb = (Py_ssize_t )kp -> ki_rssize * page_size_kb ;
1997+ }
1998+ else {
1999+ // Process with PID not found, shouldn't happen for self.
2000+ rss_kb = -1 ;
2001+ }
2002+
2003+ kvm_close (kd );
2004+ return rss_kb ;
2005+
2006+ #elif defined(__OpenBSD__ )
2007+ long page_size_kb = sysconf (_SC_PAGESIZE ) / 1024 ;
2008+ if (page_size_kb <= 0 ) {
2009+ return -1 ;
2010+ }
2011+
2012+ struct kinfo_proc kp ;
2013+ pid_t pid = getpid ();
2014+ int mib [6 ];
2015+ size_t len = sizeof (kp );
2016+
2017+ mib [0 ] = CTL_KERN ;
2018+ mib [1 ] = KERN_PROC ;
2019+ mib [2 ] = KERN_PROC_PID ;
2020+ mib [3 ] = pid ;
2021+ mib [4 ] = sizeof (struct kinfo_proc ); // size of the structure we want
2022+ mib [5 ] = 1 ; // want 1 structure back
2023+ if (sysctl (mib , 6 , & kp , & len , NULL , 0 ) == -1 ) {
2024+ return -1 ;
2025+ }
2026+
2027+ if (len > 0 ) {
2028+ // p_vm_rssize is in pages on OpenBSD. Convert to KB.
2029+ return (Py_ssize_t )kp .p_vm_rssize * page_size_kb ;
2030+ }
2031+ else {
2032+ // Process info not returned
2033+ return -1 ;
2034+ }
2035+ #else
2036+ // Unsupported platform
2037+ return -1 ;
2038+ #endif
2039+ }
2040+
2041+ static bool
2042+ gc_should_collect_rss (GCState * gcstate )
2043+ {
2044+ Py_ssize_t rss = get_current_rss ();
2045+ if (rss < 0 ) {
2046+ // Reading RSS is not support or failed.
2047+ return true;
2048+ }
2049+ int threshold = gcstate -> young .threshold ;
2050+ Py_ssize_t deferred = _Py_atomic_load_ssize_relaxed (& gcstate -> deferred_count );
2051+ if (deferred > threshold * 40 ) {
2052+ // Too many new container objects since last GC, even though RSS
2053+ // might not have increased much. This is intended to avoid resource
2054+ // exhaustion if some objects consume resources but don't result in a
2055+ // RSS increase. We use 40x as the factor here because older versions
2056+ // of Python would do full collections after roughly every 70,000 new
2057+ // container objects.
2058+ return true;
2059+ }
2060+ Py_ssize_t last_rss = gcstate -> last_rss ;
2061+ Py_ssize_t rss_threshold = Py_MAX (last_rss / 10 , 128 );
2062+ if ((rss - last_rss ) > rss_threshold ) {
2063+ // The RSS has increased too much, do a collection.
2064+ return true;
2065+ }
2066+ else {
2067+ // The RSS has not increased enough, defer the collection and clear
2068+ // the young object count so we don't check RSS again on the next call
2069+ // to gc_should_collect().
2070+ PyMutex_Lock (& gcstate -> mutex );
2071+ gcstate -> deferred_count += gcstate -> young .count ;
2072+ gcstate -> young .count = 0 ;
2073+ PyMutex_Unlock (& gcstate -> mutex );
2074+ return false;
2075+ }
2076+ }
2077+
18812078static bool
18822079gc_should_collect (GCState * gcstate )
18832080{
@@ -1887,11 +2084,17 @@ gc_should_collect(GCState *gcstate)
18872084 if (count <= threshold || threshold == 0 || !gc_enabled ) {
18882085 return false;
18892086 }
1890- // Avoid quadratic behavior by scaling threshold to the number of live
1891- // objects. A few tests rely on immediate scheduling of the GC so we ignore
1892- // the scaled threshold if generations[1].threshold is set to zero.
1893- return (count > gcstate -> long_lived_total / 4 ||
1894- gcstate -> old [0 ].threshold == 0 );
2087+ if (gcstate -> old [0 ].threshold == 0 ) {
2088+ // A few tests rely on immediate scheduling of the GC so we ignore the
2089+ // extra conditions if generations[1].threshold is set to zero.
2090+ return true;
2091+ }
2092+ if (count < gcstate -> long_lived_total / 4 ) {
2093+ // Avoid quadratic behavior by scaling threshold to the number of live
2094+ // objects.
2095+ return false;
2096+ }
2097+ return gc_should_collect_rss (gcstate );
18952098}
18962099
18972100static void
@@ -1940,6 +2143,7 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
19402143 }
19412144
19422145 state -> gcstate -> young .count = 0 ;
2146+ state -> gcstate -> deferred_count = 0 ;
19432147 for (int i = 1 ; i <= generation ; ++ i ) {
19442148 state -> gcstate -> old [i - 1 ].count = 0 ;
19452149 }
@@ -2033,6 +2237,9 @@ gc_collect_internal(PyInterpreterState *interp, struct collection_state *state,
20332237 // to be freed.
20342238 delete_garbage (state );
20352239
2240+ // Store the current RSS, possibly smaller now that we deleted garbage.
2241+ state -> gcstate -> last_rss = get_current_rss ();
2242+
20362243 // Append objects with legacy finalizers to the "gc.garbage" list.
20372244 handle_legacy_finalizers (state );
20382245}
0 commit comments