diff --git a/src/coreclr/gc/unix/gcenv.unix.cpp b/src/coreclr/gc/unix/gcenv.unix.cpp index 8b536ad9dd7d82..4ad5f4b3795e80 100644 --- a/src/coreclr/gc/unix/gcenv.unix.cpp +++ b/src/coreclr/gc/unix/gcenv.unix.cpp @@ -876,21 +876,29 @@ bool ReadMemoryValueFromFile(const char* filename, uint64_t* val) return result; } +#define UPDATE_CACHE_SIZE_AND_LEVEL(CACHE_LEVEL) if (size > cacheSize) { cacheSize = size; cacheLevel = CACHE_LEVEL; } + static size_t GetLogicalProcessorCacheSizeFromOS() { + size_t cacheLevel = 0; size_t cacheSize = 0; + size_t size; #ifdef _SC_LEVEL1_DCACHE_SIZE - cacheSize = std::max(cacheSize, ( size_t) sysconf(_SC_LEVEL1_DCACHE_SIZE)); + size = ( size_t) sysconf(_SC_LEVEL1_DCACHE_SIZE); + UPDATE_CACHE_SIZE_AND_LEVEL(1) #endif #ifdef _SC_LEVEL2_CACHE_SIZE - cacheSize = std::max(cacheSize, ( size_t) sysconf(_SC_LEVEL2_CACHE_SIZE)); + size = ( size_t) sysconf(_SC_LEVEL2_CACHE_SIZE); + UPDATE_CACHE_SIZE_AND_LEVEL(2) #endif #ifdef _SC_LEVEL3_CACHE_SIZE - cacheSize = std::max(cacheSize, ( size_t) sysconf(_SC_LEVEL3_CACHE_SIZE)); + size = ( size_t) sysconf(_SC_LEVEL3_CACHE_SIZE); + UPDATE_CACHE_SIZE_AND_LEVEL(3) #endif #ifdef _SC_LEVEL4_CACHE_SIZE - cacheSize = std::max(cacheSize, ( size_t) sysconf(_SC_LEVEL4_CACHE_SIZE)); + size = ( size_t) sysconf(_SC_LEVEL4_CACHE_SIZE); + UPDATE_CACHE_SIZE_AND_LEVEL(4) #endif #if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86) @@ -901,25 +909,39 @@ static size_t GetLogicalProcessorCacheSizeFromOS() // for the platform. Currently musl and arm64 should be only cases to use // this method to determine cache size. // - size_t size; - - if (ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index0/size", &size)) - cacheSize = std::max(cacheSize, size); - if (ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index1/size", &size)) - cacheSize = std::max(cacheSize, size); - if (ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index2/size", &size)) - cacheSize = std::max(cacheSize, size); - if (ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index3/size", &size)) - cacheSize = std::max(cacheSize, size); - if (ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index4/size", &size)) - cacheSize = std::max(cacheSize, size); + size_t level; + char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size"; + char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level"; + int index = 40; + assert(path_to_size_file[index] == '-'); + assert(path_to_level_file[index] == '-'); + + for (int i = 0; i < 5; i++) + { + path_to_size_file[index] = (char)(48 + i); + + if (ReadMemoryValueFromFile(path_to_size_file, &size)) + { + path_to_level_file[index] = (char)(48 + i); + + if (ReadMemoryValueFromFile(path_to_level_file, &level)) + { + UPDATE_CACHE_SIZE_AND_LEVEL(level) + } + else + { + cacheSize = std::max(cacheSize, size); + } + } + } } #endif -#if defined(HOST_ARM64) && !defined(TARGET_OSX) +#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX) if (cacheSize == 0) { - // It is currently expected to be missing cache size info + // We expect to get the L3 cache size for Arm64 but currently expected to be missing that info + // from most of the machines. // // _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64 // @@ -964,6 +986,38 @@ static size_t GetLogicalProcessorCacheSizeFromOS() } #endif +#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX) + if (cacheLevel != 3) + { + // We expect to get the L3 cache size for Arm64 but currently expected to be missing that info + // from most of the machines. + // Hence, just use the following heuristics at best depending on the CPU count + // 1 ~ 4 : 4 MB + // 5 ~ 16 : 8 MB + // 17 ~ 64 : 16 MB + // 65+ : 32 MB + DWORD logicalCPUs = g_totalCpuCount; + if (logicalCPUs < 5) + { + cacheSize = 4; + } + else if (logicalCPUs < 17) + { + cacheSize = 8; + } + else if (logicalCPUs < 65) + { + cacheSize = 16; + } + else + { + cacheSize = 32; + } + + cacheSize *= (1024 * 1024); + } +#endif + return cacheSize; } @@ -1037,15 +1091,10 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize) size_t maxSize, maxTrueSize; maxSize = maxTrueSize = GetLogicalProcessorCacheSizeFromOS(); // Returns the size of the highest level processor cache -#if defined(HOST_ARM64) - // Bigger gen0 size helps arm64 targets - maxSize = maxTrueSize * 3; -#endif - s_maxSize = maxSize; s_maxTrueSize = maxTrueSize; - // printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize); + // printf("GetCacheSizePerLogicalCpu returns %zu, adjusted size %zu\n", maxSize, maxTrueSize); return trueSize ? maxTrueSize : maxSize; } diff --git a/src/coreclr/gc/windows/gcenv.windows.cpp b/src/coreclr/gc/windows/gcenv.windows.cpp index 8bb35badd6c2fa..ca7cc4132e5eee 100644 --- a/src/coreclr/gc/windows/gcenv.windows.cpp +++ b/src/coreclr/gc/windows/gcenv.windows.cpp @@ -402,6 +402,8 @@ SYSTEM_LOGICAL_PROCESSOR_INFORMATION *GetLPI(PDWORD nEntries) size_t GetLogicalProcessorCacheSizeFromOS() { size_t cache_size = 0; + size_t cache_level = 0; + DWORD nEntries = 0; // Try to use GetLogicalProcessorInformation API and get a valid pointer to the SLPI array if successful. Returns NULL @@ -424,7 +426,11 @@ size_t GetLogicalProcessorCacheSizeFromOS() { if (pslpi[i].Relationship == RelationCache) { - last_cache_size = max(last_cache_size, pslpi[i].Cache.Size); + if (last_cache_size < pslpi[i].Cache.Size) + { + last_cache_size = pslpi[i].Cache.Size; + cache_level = pslpi[i].Cache.Level; + } } } cache_size = last_cache_size; @@ -434,6 +440,39 @@ size_t GetLogicalProcessorCacheSizeFromOS() if(pslpi) delete[] pslpi; // release the memory allocated for the SLPI array. +#if defined(TARGET_ARM64) + if (cache_level != 3) + { + uint32_t totalCPUCount = GCToOSInterface::GetTotalProcessorCount(); + + // We expect to get the L3 cache size for Arm64 but currently expected to be missing that info + // from most of the machines. + // Hence, just use the following heuristics at best depending on the CPU count + // 1 ~ 4 : 4 MB + // 5 ~ 16 : 8 MB + // 17 ~ 64 : 16 MB + // 65+ : 32 MB + if (totalCPUCount < 5) + { + cache_size = 4; + } + else if (totalCPUCount < 17) + { + cache_size = 8; + } + else if (totalCPUCount < 65) + { + cache_size = 16; + } + else + { + cache_size = 32; + } + + cache_size *= (1024 * 1024); + } +#endif // TARGET_ARM64 + return cache_size; } @@ -836,15 +875,10 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize) maxSize = maxTrueSize = GetLogicalProcessorCacheSizeFromOS() ; // Returns the size of the highest level processor cache -#if defined(TARGET_ARM64) - // Bigger gen0 size helps arm64 targets - maxSize = maxTrueSize * 3; -#endif - s_maxSize = maxSize; s_maxTrueSize = maxTrueSize; - // printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize); + // printf("GetCacheSizePerLogicalCpu returns %zu, adjusted size %zu\n", maxSize, maxTrueSize); return trueSize ? maxTrueSize : maxSize; } diff --git a/src/coreclr/pal/src/misc/sysinfo.cpp b/src/coreclr/pal/src/misc/sysinfo.cpp index eb679d3d24957c..b8840eff002191 100644 --- a/src/coreclr/pal/src/misc/sysinfo.cpp +++ b/src/coreclr/pal/src/misc/sysinfo.cpp @@ -539,23 +539,31 @@ ReadMemoryValueFromFile(const char* filename, uint64_t* val) return result; } +#define UPDATE_CACHE_SIZE_AND_LEVEL(CACHE_LEVEL) if (size > cacheSize) { cacheSize = size; cacheLevel = CACHE_LEVEL; } + size_t PALAPI PAL_GetLogicalProcessorCacheSizeFromOS() { + size_t cacheLevel = 0; size_t cacheSize = 0; + size_t size; #ifdef _SC_LEVEL1_DCACHE_SIZE - cacheSize = std::max(cacheSize, (size_t)sysconf(_SC_LEVEL1_DCACHE_SIZE)); + size = ( size_t) sysconf(_SC_LEVEL1_DCACHE_SIZE); + UPDATE_CACHE_SIZE_AND_LEVEL(1) #endif #ifdef _SC_LEVEL2_CACHE_SIZE - cacheSize = std::max(cacheSize, (size_t)sysconf(_SC_LEVEL2_CACHE_SIZE)); + size = ( size_t) sysconf(_SC_LEVEL2_CACHE_SIZE); + UPDATE_CACHE_SIZE_AND_LEVEL(2) #endif #ifdef _SC_LEVEL3_CACHE_SIZE - cacheSize = std::max(cacheSize, (size_t)sysconf(_SC_LEVEL3_CACHE_SIZE)); + size = ( size_t) sysconf(_SC_LEVEL3_CACHE_SIZE); + UPDATE_CACHE_SIZE_AND_LEVEL(3) #endif #ifdef _SC_LEVEL4_CACHE_SIZE - cacheSize = std::max(cacheSize, (size_t)sysconf(_SC_LEVEL4_CACHE_SIZE)); + size = ( size_t) sysconf(_SC_LEVEL4_CACHE_SIZE); + UPDATE_CACHE_SIZE_AND_LEVEL(4) #endif #if defined(TARGET_LINUX) && !defined(HOST_ARM) && !defined(HOST_X86) @@ -566,25 +574,39 @@ PAL_GetLogicalProcessorCacheSizeFromOS() // for the platform. Currently musl and arm64 should be only cases to use // this method to determine cache size. // - size_t size; - - if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index0/size", &size)) - cacheSize = std::max(cacheSize, size); - if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index1/size", &size)) - cacheSize = std::max(cacheSize, size); - if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index2/size", &size)) - cacheSize = std::max(cacheSize, size); - if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index3/size", &size)) - cacheSize = std::max(cacheSize, size); - if(ReadMemoryValueFromFile("/sys/devices/system/cpu/cpu0/cache/index4/size", &size)) - cacheSize = std::max(cacheSize, size); + size_t level; + char path_to_size_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/size"; + char path_to_level_file[] = "/sys/devices/system/cpu/cpu0/cache/index-/level"; + int index = 40; + _ASSERTE(path_to_size_file[index] == '-'); + _ASSERTE(path_to_level_file[index] == '-'); + + for (int i = 0; i < 5; i++) + { + path_to_size_file[index] = (char)(48 + i); + + if (ReadMemoryValueFromFile(path_to_size_file, &size)) + { + path_to_level_file[index] = (char)(48 + i); + + if (ReadMemoryValueFromFile(path_to_level_file, &level)) + { + UPDATE_CACHE_SIZE_AND_LEVEL(level) + } + else + { + cacheSize = std::max(cacheSize, size); + } + } + } } #endif #if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX) if (cacheSize == 0) { - // It is currently expected to be missing cache size info + // We expect to get the L3 cache size for Arm64 but currently expected to be missing that info + // from most of the machines with an exceptions on some machines. // // _SC_LEVEL*_*CACHE_SIZE is not yet present. Work is in progress to enable this for arm64 // @@ -621,12 +643,43 @@ PAL_GetLogicalProcessorCacheSizeFromOS() || sysctlbyname("hw.l3cachesize", &cacheSizeFromSysctl, &sz, nullptr, 0) == 0 || sysctlbyname("hw.l2cachesize", &cacheSizeFromSysctl, &sz, nullptr, 0) == 0 || sysctlbyname("hw.l1dcachesize", &cacheSizeFromSysctl, &sz, nullptr, 0) == 0; - if (success) { _ASSERTE(cacheSizeFromSysctl > 0); - cacheSize = (size_t) cacheSizeFromSysctl; + cacheSize = ( size_t) cacheSizeFromSysctl; + } + } +#endif + +#if (defined(HOST_ARM64) || defined(HOST_LOONGARCH64)) && !defined(TARGET_OSX) + if (cacheLevel != 3) + { + // We expect to get the L3 cache size for Arm64 but currently expected to be missing that info + // from most of the machines. + // Hence, just use the following heuristics at best depending on the CPU count + // 1 ~ 4 : 4 MB + // 5 ~ 16 : 8 MB + // 17 ~ 64 : 16 MB + // 65+ : 32 MB + DWORD logicalCPUs = PAL_GetLogicalCpuCountFromOS(); + if (logicalCPUs < 5) + { + cacheSize = 4; + } + else if (logicalCPUs < 17) + { + cacheSize = 8; } + else if (logicalCPUs < 65) + { + cacheSize = 16; + } + else + { + cacheSize = 32; + } + + cacheSize *= (1024 * 1024); } #endif diff --git a/src/coreclr/vm/gcenv.os.cpp b/src/coreclr/vm/gcenv.os.cpp index c2bb155208d486..23e90c6cda075f 100644 --- a/src/coreclr/vm/gcenv.os.cpp +++ b/src/coreclr/vm/gcenv.os.cpp @@ -588,6 +588,7 @@ SYSTEM_LOGICAL_PROCESSOR_INFORMATION *IsGLPISupported( PDWORD nEntries ) size_t GetLogicalProcessorCacheSizeFromOS() { size_t cache_size = 0; + size_t cache_level = 0; DWORD nEntries = 0; // Try to use GetLogicalProcessorInformation API and get a valid pointer to the SLPI array if successful. Returns NULL @@ -610,7 +611,11 @@ size_t GetLogicalProcessorCacheSizeFromOS() { if (pslpi[i].Relationship == RelationCache) { - last_cache_size = max(last_cache_size, pslpi[i].Cache.Size); + if (last_cache_size < pslpi[i].Cache.Size) + { + last_cache_size = pslpi[i].Cache.Size; + cache_level = pslpi[i].Cache.Level; + } } } cache_size = last_cache_size; @@ -620,6 +625,39 @@ size_t GetLogicalProcessorCacheSizeFromOS() if(pslpi) delete[] pslpi; // release the memory allocated for the SLPI array. +#if defined(TARGET_ARM64) + if (cache_level != 3) + { + uint32_t totalCPUCount = GCToOSInterface::GetTotalProcessorCount(); + + // We expect to get the L3 cache size for Arm64 but currently expected to be missing that info + // from most of the machines. + // Hence, just use the following heuristics at best depending on the CPU count + // 1 ~ 4 : 4 MB + // 5 ~ 16 : 8 MB + // 17 ~ 64 : 16 MB + // 65+ : 32 MB + if (totalCPUCount < 5) + { + cache_size = 4; + } + else if (totalCPUCount < 17) + { + cache_size = 8; + } + else if (totalCPUCount < 65) + { + cache_size = 16; + } + else + { + cache_size = 32; + } + + cache_size *= (1024 * 1024); + } +#endif // TARGET_ARM64 + return cache_size; } @@ -646,15 +684,10 @@ size_t GCToOSInterface::GetCacheSizePerLogicalCpu(bool trueSize) maxSize = maxTrueSize = GetLogicalProcessorCacheSizeFromOS() ; // Returns the size of the highest level processor cache -#if defined(TARGET_ARM64) - // Bigger gen0 size helps arm64 targets - maxSize = maxTrueSize * 3; -#endif - s_maxSize = maxSize; s_maxTrueSize = maxTrueSize; - // printf("GetCacheSizePerLogicalCpu returns %d, adjusted size %d\n", maxSize, maxTrueSize); + // printf("GetCacheSizePerLogicalCpu returns %zu, adjusted size %zu\n", maxSize, maxTrueSize); return trueSize ? maxTrueSize : maxSize; }