diff --git a/src/libumf.c b/src/libumf.c index 9df7ee29cd..4ccde9bb4f 100644 --- a/src/libumf.c +++ b/src/libumf.c @@ -40,6 +40,13 @@ static umf_ctl_node_t CTL_NODE(umf)[] = {CTL_CHILD(provider), CTL_CHILD(pool), void initialize_global_ctl(void) { CTL_REGISTER_MODULE(NULL, umf); } +// Benchmarks may fork multiple times and topology init is slow. +// Init topology before fork (if not already) so children don't repeat it. +// TODO: This is a hack. Better solution is needed. +#if !defined(_WIN32) && !defined(UMF_NO_HWLOC) +static void atfork_prepare(void) { umfGetTopologyReduced(); } +#endif + umf_result_t umfInit(void) { utils_init_once(&initMutexOnce, initialize_init_mutex); @@ -74,11 +81,11 @@ umf_result_t umfInit(void) { if (TRACKER) { LOG_DEBUG("UMF library initialized"); } -#if !defined(UMF_NO_HWLOC) - // some benchmarks uses multiple forks, and topology initialization is very slow - // so if we initialize topology before the first fork, we can get significant performance gain. - umfGetTopologyReduced(); + +#if !defined(_WIN32) && !defined(UMF_NO_HWLOC) + pthread_atfork(atfork_prepare, NULL, NULL); #endif + return UMF_RESULT_SUCCESS; }