Skip to content

Commit 1db5087

Browse files
huajsjhua jiang
authored andcommitted
[Runtime][ThreadPool]Refactor affinity function and support CPU affinity list setting. (apache#9802)
* [Runtime][ThreadPool] Refactor affinity function and support CPU affinity list setting. Issue: 1. There are multiple affinity function using "LINUX" and "ANDROID" macro check and the multiple check make the logic maintain and change become complex. 2. Current logic of tvm [Runtime][ThreadPool] assume all of the cpu resources are available for a single backend runtime to do the data flow computation. But such assumption may not true when user running multiple task on the system and not want tvm task exhaust all of the cpu resource, or when user going to run multiple backend runtime of tvm on the system, each backend runtime of tvm should use different cpu affinity settings to achieve best performance. Solution: 1.Refactor the affinity functions to move the "LINUX" and "ANDROID" check into one function. 2.In this solution, we introduce a new "CPU AffinityMode type" named "kSpecify", by using "kSpecify" and the function named "tvm::runtime::threading ::Configure" user can specify the cpu list for the cpu affinity of a backend runtime. This solution reused the existing per thread thread pool logic of [Runtime][Threadpool] that created a worker thread pool for current thread which can running a particular runtime. for a multiple runtime use case, user can first launch multiple threads, then call "tvm::runtime::threading ::Configure" with cpu list to create tvm data flow worker thread pool, after doing this the execution of the multiple runtime on the multiple threads will use different cpu resource list. * fix windows build issue. * fix build issue. * fix build issue. * fix windows build issue. * fix plint issue * polish comments. * address review comments. * address reivew comments. * address review comments. * address review comments. Co-authored-by: hua jiang <[email protected]>
1 parent dbce603 commit 1db5087

File tree

4 files changed

+353
-114
lines changed

4 files changed

+353
-114
lines changed

include/tvm/runtime/threading_backend.h

Lines changed: 43 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,27 @@
2828
#include <memory>
2929
#include <vector>
3030

31+
#if defined(__linux__) || defined(__ANDROID__)
32+
#if defined(__ANDROID__)
33+
#ifndef CPU_SET
34+
#define CPU_SETSIZE 1024
35+
#define __NCPUBITS (8 * sizeof(uint64_t))
36+
typedef struct {
37+
uint64_t __bits[CPU_SETSIZE / __NCPUBITS];
38+
} cpu_set_t;
39+
40+
#define CPU_SET(cpu, cpusetp) \
41+
((cpusetp)->__bits[(cpu) / __NCPUBITS] |= (1UL << ((cpu) % __NCPUBITS)))
42+
#define CPU_ZERO(cpusetp) memset((cpusetp), 0, sizeof(cpu_set_t))
43+
#define CPU_ISSET(cpu, cpusetp) \
44+
(1UL << ((cpu) % __NCPUBITS)) == \
45+
((cpusetp)->__bits[(cpu) / __NCPUBITS] & (1UL << ((cpu) % __NCPUBITS)))
46+
#define CPU_EQUAL(left, right) (memcmp(&left, &right, sizeof(cpu_set_t)) == 0)
47+
48+
#endif
49+
#endif
50+
#endif
51+
3152
namespace tvm {
3253
namespace runtime {
3354
namespace threading {
@@ -64,21 +85,26 @@ class ThreadGroup {
6485
enum AffinityMode : int {
6586
kBig = 1,
6687
kLittle = -1,
88+
/*Different threads will get different affinities.*/
89+
kSpecifyOneCorePerThread = -2,
90+
/*All threads will get the same core group affinity.*/
91+
kSpecifyThreadShareAllCore = -3,
6792
};
68-
6993
/*!
7094
* \brief configure the CPU id affinity
7195
*
72-
* \param mode The preferred CPU type (1 = big, -1 = little).
96+
* \param mode The preferred CPU type (1 = big, -1 = little ...).
7397
* \param nthreads The number of threads to use (0 = use all).
7498
* \param exclude_worker0 Whether to use the main thread as a worker.
7599
* If `true`, worker0 will not be launched in a new thread and
76100
* `worker_callback` will only be called for values >= 1. This
77101
* allows use of the main thread as a worker.
102+
* \param cpus A list of CPU used to set 'cpu affinity'.
78103
*
79104
* \return The number of workers to use.
80105
*/
81-
int Configure(AffinityMode mode, int nthreads, bool exclude_worker0);
106+
int Configure(AffinityMode mode, int nthreads, bool exclude_worker0,
107+
std::vector<unsigned int> cpus = {});
82108

83109
private:
84110
Impl* impl_;
@@ -88,12 +114,14 @@ class ThreadGroup {
88114
* \brief Platform-agnostic no-op.
89115
*/
90116
void Yield();
91-
92117
/*!
93118
* \return the maximum number of effective workers for this system.
94119
*/
95120
int MaxConcurrency();
96-
121+
/*!
122+
* \brief Setting the maximum number of available cores.
123+
*/
124+
void SetMaxConcurrency(int value);
97125
/*!
98126
* \brief Reset the threads in the pool. All current threads are destroyed and
99127
* new ones are created.
@@ -102,6 +130,16 @@ int MaxConcurrency();
102130
*/
103131
void ResetThreadPool();
104132

133+
/*!
134+
* \brief Configuring the CPU affinity mode for the working threads.
135+
* \param mode The preferred CPU type (1 = big, -1 = little, -2 = kSpecifyOneCorePerThread,
136+
* -3 = kSpecifyThreadShareAllCore).
137+
* \param nthreads The number of threads to use (0 = use all).
138+
* \param cpus A list of CPUs is used to set the 'cpu affinity' for the worker threads.
139+
*/
140+
void Configure(tvm::runtime::threading::ThreadGroup::AffinityMode mode, int nthreads,
141+
std::vector<unsigned int> cpus);
142+
105143
} // namespace threading
106144
} // namespace runtime
107145
} // namespace tvm

src/runtime/thread_pool.cc

Lines changed: 33 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include <dmlc/thread_local.h>
2525
#include <tvm/runtime/c_backend_api.h>
2626
#include <tvm/runtime/c_runtime_api.h>
27+
#include <tvm/runtime/container/array.h>
2728
#include <tvm/runtime/logging.h>
2829
#include <tvm/runtime/packed_func.h>
2930
#include <tvm/runtime/registry.h>
@@ -42,12 +43,13 @@
4243
#include <thread>
4344
#include <vector>
4445

46+
#include "../support/utils.h"
4547
const constexpr int kL1CacheBytes = 64;
4648

4749
namespace tvm {
4850
namespace runtime {
4951
namespace {
50-
52+
using support::IsNumber;
5153
constexpr uint32_t kDefaultSpinCount = 300000;
5254

5355
uint32_t GetSpinCount() {
@@ -317,10 +319,11 @@ class ThreadPool {
317319

318320
static ThreadPool* ThreadLocal() { return dmlc::ThreadLocalStore<ThreadPool>::Get(); }
319321

320-
void UpdateWorkerConfiguration(threading::ThreadGroup::AffinityMode mode, int nthreads) {
322+
void UpdateWorkerConfiguration(threading::ThreadGroup::AffinityMode mode, int nthreads,
323+
const std::vector<unsigned int>& cpus) {
321324
// this will also reset the affinity of the ThreadGroup
322325
// may use less than the MaxConcurrency number of workers
323-
num_workers_used_ = threads_->Configure(mode, nthreads, exclude_worker0_);
326+
num_workers_used_ = threads_->Configure(mode, nthreads, exclude_worker0_, cpus);
324327
// if MaxConcurrency restricted the number of workers (e.g., due to
325328
// hyperthreading), respect the restriction
326329
num_workers_used_ = std::min(num_workers_, num_workers_used_);
@@ -369,17 +372,42 @@ class ThreadPool {
369372
std::unique_ptr<tvm::runtime::threading::ThreadGroup> threads_;
370373
};
371374

375+
/*!
376+
* \brief args[0] is the AffinityMode, args[1] is the number of threads.
377+
* args2 is a list of CPUs which is used to set the CPU affinity.
378+
*/
372379
TVM_REGISTER_GLOBAL("runtime.config_threadpool").set_body([](TVMArgs args, TVMRetValue* rv) {
373380
threading::ThreadGroup::AffinityMode mode =
374381
static_cast<threading::ThreadGroup::AffinityMode>(static_cast<int>(args[0]));
375382
int nthreads = args[1];
376-
ThreadPool::ThreadLocal()->UpdateWorkerConfiguration(mode, nthreads);
383+
std::vector<unsigned int> cpus;
384+
if (args.num_args >= 3) {
385+
Array<String> cpu_array = args[2];
386+
for (auto cpu : cpu_array) {
387+
ICHECK(IsNumber(cpu)) << "The CPU core information '" << cpu << "' is not a number.";
388+
cpus.push_back(std::stoi(cpu));
389+
std::cout << "cpu is " << cpu << std::endl;
390+
}
391+
}
392+
threading::Configure(mode, nthreads, cpus);
377393
});
378394

379395
namespace threading {
380396
void ResetThreadPool() { tvm::runtime::ThreadPool::ThreadLocal()->Reset(); }
397+
/*!
398+
* \brief configure the CPU id affinity
399+
* \param mode The preferred CPU type (1 = big, -1 = little, -2 = specify ,
400+
* -3 = kSpecifyOneCorePerThread, -3 = kSpecifyThreadShareAllCore).
401+
* \param nthreads The number of threads to use (0 = use all).
402+
* \param cpus cpus A list of CPUs is used to set the 'cpu affinity' for the worker threads.
403+
*
404+
*/
405+
void Configure(tvm::runtime::threading::ThreadGroup::AffinityMode mode, int nthreads,
406+
std::vector<unsigned int> cpus) {
407+
tvm::runtime::threading::SetMaxConcurrency(cpus.size());
408+
tvm::runtime::ThreadPool::ThreadLocal()->UpdateWorkerConfiguration(mode, nthreads, cpus);
409+
}
381410
} // namespace threading
382-
383411
} // namespace runtime
384412
} // namespace tvm
385413

0 commit comments

Comments
 (0)