-
Couldn't load subscription status.
- Fork 190
scx_mitosis: add l3 awareness and work stealing #2761
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
1b11d2b
0b7a1fc
0c3c7bb
ba1924b
0dd6be6
8523b9d
7ddaba0
7639d21
7972846
73a8623
d0a7eed
0b126e9
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,201 @@ | ||
| /* Copyright (c) Meta Platforms, Inc. and affiliates. */ | ||
| /* | ||
| * This software may be used and distributed according to the terms of the | ||
| * GNU General Public License version 2. | ||
| * | ||
| * This header defines the 64-bit dispatch queue (DSQ) ID encoding | ||
| * scheme for scx_mitosis, using type fields to distinguish between | ||
| * per-CPU and cell+L3 domain queues. It includes helper functions to | ||
| * construct, validate, and parse these DSQ IDs for queue management. | ||
| */ | ||
| #pragma once | ||
|
|
||
| #include "intf.h" | ||
| #include "mitosis.bpf.h" | ||
|
|
||
| /* | ||
| * ================================ | ||
| * BPF DSQ ID Layout (64 bits wide) | ||
| * ================================ | ||
| * | ||
| * Top-level format: | ||
| * [63] [62..0] | ||
| * [ B] [ ID ] | ||
| * | ||
| * If B == 1 it is a Built-in DSQ | ||
| * ------------------------- | ||
| * [63] [62] [61 .. 32] [31..0] | ||
| * [ 1] [ L] [ R ] [ V ] | ||
| * | ||
| * - L (bit 62): LOCAL_ON flag | ||
| * If L == 1 -> V = CPU number | ||
| * - R (30 bits): reserved / unused | ||
| * - V (32 bits): value (e.g., CPU#) | ||
| * | ||
| * If B == 0 -> User-defined DSQ | ||
| * ----------------------------- | ||
| * Only the low 32 bits are used. | ||
| * | ||
| * [63 .. 32] [31..0] | ||
| * [ 0][ unused ] [ VAL ] | ||
| * | ||
| * Mitosis uses VAL as follows: | ||
| * | ||
| * [31..28] [27..0] | ||
| * [QTYPE ] [DATA ] | ||
| * | ||
| * QTYPE encodes the queue type: | ||
| * | ||
| * QTYPE = 0x1 -> Per-CPU Q | ||
| * [31..28] [27 .. .. 0] | ||
| * [ 0001 ] [ CPU# ] | ||
| * [Q-TYPE:1] | ||
| * | ||
| * QTYPE = 0x2 -> Cell+L3 Q | ||
| * [31..28] [27 .. 16] [15 .. 0] | ||
| * [ 0010 ] [ CELL# ] [ L3ID ] | ||
| * [Q-TYPE:2] | ||
| * | ||
| */ | ||
| /* | ||
| * The use of these bitfields depends on compiler defined byte AND bit ordering. | ||
| * Make sure we're only building with Clang/LLVM and that we're little-endian. | ||
| */ | ||
| #ifndef __clang__ | ||
| #error "This code must be compiled with Clang/LLVM (eBPF: clang -target bpf)." | ||
| #endif | ||
|
|
||
| #if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ | ||
| #error "dsq64 bitfield layout assumes little-endian (bpfel)." | ||
| #endif | ||
|
|
||
| /* ---- Bitfield widths (bits) ---- */ | ||
| #define CPU_B 28 | ||
| #define L3_B 16 | ||
| #define CELL_B 12 | ||
| #define TYPE_B 4 | ||
| #define DATA_B 28 | ||
| #define RSVD_B 32 | ||
|
|
||
| /* Sum checks (in bits) */ | ||
| _Static_assert(CPU_B + TYPE_B == 32, "CPU layout low half must be 32 bits"); | ||
| _Static_assert(L3_B + CELL_B + TYPE_B == 32, | ||
| "CELL+L3 layout low half must be 32 bits"); | ||
| _Static_assert(DATA_B + TYPE_B == 32, "Common layout low half must be 32 bits"); | ||
|
|
||
| typedef union { | ||
| u64 raw; | ||
|
|
||
| /* Per-CPU user DSQ */ | ||
| struct { | ||
| u64 cpu : CPU_B; | ||
| u64 type : TYPE_B; | ||
| u64 rsvd : RSVD_B; | ||
| } cpu_dsq; | ||
|
|
||
| /* Cell+L3 user DSQ */ | ||
| struct { | ||
| u64 l3 : L3_B; | ||
| u64 cell : CELL_B; | ||
| u64 type : TYPE_B; | ||
| u64 rsvd : RSVD_B; | ||
| } cell_l3_dsq; | ||
|
|
||
| /* Generic user view */ | ||
| struct { | ||
| u64 data : DATA_B; | ||
| u64 type : TYPE_B; | ||
| u64 rsvd : RSVD_B; | ||
| } user_dsq; | ||
|
|
||
| /* Built-in DSQ view */ | ||
| struct { | ||
| u64 value : 32; | ||
| u64 rsvd : 30; | ||
| u64 local_on : 1; | ||
| u64 builtin : 1; | ||
| } builtin_dsq; | ||
|
|
||
| /* NOTE: Considered packed and aligned attributes, but that's redundant */ | ||
| } dsq_id_t; | ||
|
|
||
| /* | ||
| * Invalid DSQ ID Sentinel: | ||
| * invalid bc bit 63 clear (it's a user DSQ) && dsq_type == 0 (no type) | ||
| * Good for catching uninitialized DSQ IDs. | ||
| */ | ||
| #define DSQ_INVALID ((u64)0) | ||
|
|
||
| _Static_assert(sizeof(((dsq_id_t){ 0 }).cpu_dsq) == sizeof(u64), | ||
| "cpu view must be 8 bytes"); | ||
| _Static_assert(sizeof(((dsq_id_t){ 0 }).cell_l3_dsq) == sizeof(u64), | ||
| "cell+l3 view must be 8 bytes"); | ||
| _Static_assert(sizeof(((dsq_id_t){ 0 }).user_dsq) == sizeof(u64), | ||
| "user common view must be 8 bytes"); | ||
| _Static_assert(sizeof(((dsq_id_t){ 0 }).builtin_dsq) == sizeof(u64), | ||
| "builtin view must be 8 bytes"); | ||
|
|
||
| /* Compile-time checks (in bytes) */ | ||
| _Static_assert(sizeof(dsq_id_t) == sizeof(u64), | ||
| "dsq_id_t must be 8 bytes (64 bits)"); | ||
| _Static_assert(_Alignof(dsq_id_t) == sizeof(u64), | ||
| "dsq_id_t must be 8-byte aligned"); | ||
|
|
||
| /* DSQ type enumeration */ | ||
| enum dsq_type { | ||
| DSQ_TYPE_NONE, | ||
| DSQ_TYPE_CPU, | ||
| DSQ_TYPE_CELL_L3, | ||
| }; | ||
|
|
||
| /* Range guards */ | ||
| _Static_assert(MAX_CPUS <= (1u << CPU_B), "MAX_CPUS must fit in field"); | ||
| _Static_assert(MAX_L3S <= (1u << L3_B), "MAX_L3S must fit in field"); | ||
| _Static_assert(MAX_CELLS <= (1u << CELL_B), "MAX_CELLS must fit in field"); | ||
| _Static_assert(DSQ_TYPE_CELL_L3 < (1u << TYPE_B), | ||
| "DSQ_TYPE_CELL_L3 must fit in field"); | ||
|
|
||
| /* | ||
| * While I considered error propagation, I decided to bail to force errors early. | ||
| */ | ||
|
|
||
| static inline bool is_user_dsq(dsq_id_t dsq_id) | ||
| { | ||
| return !dsq_id.builtin_dsq.builtin && | ||
| dsq_id.user_dsq.type != DSQ_TYPE_NONE; | ||
| } | ||
|
|
||
| // Is this a per CPU DSQ? | ||
| static inline bool is_cpu_dsq(dsq_id_t dsq_id) | ||
| { | ||
| return is_user_dsq(dsq_id) && dsq_id.user_dsq.type == DSQ_TYPE_CPU; | ||
| } | ||
|
|
||
| // If this is a per cpu dsq, return the cpu | ||
| static inline u32 get_cpu_from_dsq(dsq_id_t dsq_id) | ||
| { | ||
| if (!is_cpu_dsq(dsq_id)) | ||
| scx_bpf_error("trying to get cpu from non-cpu dsq\n"); | ||
|
|
||
| return dsq_id.cpu_dsq.cpu; | ||
| } | ||
|
|
||
| /* Helper functions to construct DSQ IDs */ | ||
| static inline dsq_id_t get_cpu_dsq_id(u32 cpu) | ||
| { | ||
| // Check for valid CPU range, 0 indexed so >=. | ||
| if (cpu >= MAX_CPUS) | ||
| scx_bpf_error("invalid cpu %u\n", cpu); | ||
|
|
||
| return (dsq_id_t){ .cpu_dsq = { .cpu = cpu, .type = DSQ_TYPE_CPU } }; | ||
| } | ||
|
|
||
| static inline dsq_id_t get_cell_l3_dsq_id(u32 cell, u32 l3) | ||
| { | ||
| if (cell >= MAX_CELLS || l3 >= MAX_L3S) | ||
| scx_bpf_error("cell %u or l3 %u too large\n", cell, l3); | ||
|
|
||
| return (dsq_id_t){ .cell_l3_dsq = { .l3 = l3, | ||
| .cell = cell, | ||
| .type = DSQ_TYPE_CELL_L3 } }; | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -5,7 +5,8 @@ | |
| #ifndef __INTF_H | ||
| #define __INTF_H | ||
|
|
||
| #ifndef __KERNEL__ | ||
| #ifndef __BPF__ | ||
| #include <stddef.h> | ||
| typedef unsigned long long u64; | ||
| typedef unsigned int u32; | ||
| typedef _Bool bool; | ||
|
|
@@ -18,6 +19,10 @@ typedef _Bool bool; | |
| #include <scx/ravg.bpf.h> | ||
| #endif | ||
|
|
||
| /* ---- Work stealing config (compile-time) ------------------------------- */ | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it might be best to have this as a runtime option - e.g. a flag passed to the user space binary that writes to a global static variable in the bpf code before running it |
||
| #define MITOSIS_ENABLE_STEALING 1 | ||
| /* ----------------------------------------------------------------------- */ | ||
|
|
||
| enum consts { | ||
| CACHELINE_SIZE = 64, | ||
| MAX_CPUS_SHIFT = 9, | ||
|
|
@@ -28,8 +33,67 @@ enum consts { | |
|
|
||
| PCPU_BASE = 0x80000000, | ||
| MAX_CG_DEPTH = 256, | ||
|
|
||
| MAX_L3S = 16, | ||
| }; | ||
|
|
||
| /* Kernel side sees the real lock; userspace sees padded bytes of same size/alignment */ | ||
| #if defined(__BPF__) | ||
| #define CELL_LOCK_T struct bpf_spin_lock | ||
| #else | ||
| /* userspace placeholder: kernel won’t copy spin_lock */ | ||
| #define CELL_LOCK_T \ | ||
| struct { \ | ||
| u32 __pad; \ | ||
| } /* 4-byte aligned as required */ | ||
| #endif | ||
|
|
||
| struct cell { | ||
| // This is a lock in the kernel and padding in the user | ||
| CELL_LOCK_T lock; // Assumed to be the first entry (see below) | ||
|
|
||
| // Whether or not the cell is used | ||
| u32 in_use; | ||
|
|
||
| // Number of CPUs in this cell | ||
| u32 cpu_cnt; | ||
|
|
||
| // Number of L3s with at least one CPU in this cell | ||
| u32 l3_present_cnt; | ||
|
|
||
| // Number of CPUs from each L3 assigned to this cell | ||
| u32 l3_cpu_cnt[MAX_L3S]; | ||
|
|
||
| // per-L3 vtimes within this cell | ||
| u64 l3_vtime_now[MAX_L3S]; | ||
| }; | ||
|
|
||
| // Putting the lock first in the struct is our convention. | ||
| // We pad this space when in Rust code that will never see the lock value. | ||
| // We intentionally avoid it in copy_cell_no_lock to keep the verifier happy. | ||
| // It is a BPF constraint that it is 4 byte aligned. | ||
|
|
||
| // All assertions work for both BPF and userspace builds | ||
| _Static_assert(offsetof(struct cell, lock) == 0, | ||
| "lock/padding must be first field"); | ||
|
|
||
| _Static_assert(sizeof(((struct cell *)0)->lock) == 4, | ||
| "lock/padding must be 4 bytes"); | ||
|
|
||
| _Static_assert(_Alignof(CELL_LOCK_T) == 4, | ||
| "lock/padding must be 4-byte aligned"); | ||
|
|
||
| _Static_assert(offsetof(struct cell, in_use) == 4, | ||
| "in_use must follow 4-byte lock/padding"); | ||
|
|
||
| // Verify these are the same size in both BPF and Rust. | ||
| _Static_assert(sizeof(struct cell) == | ||
| ((4 * sizeof(u32)) + (4 * MAX_L3S) + (8 * MAX_L3S)), | ||
| "struct cell size must be stable for Rust bindings"); | ||
|
|
||
| _Static_assert(sizeof(struct cell) == 208, | ||
| "struct cell must be exactly 208 bytes"); | ||
|
|
||
| /* Statistics */ | ||
| enum cell_stat_idx { | ||
| CSTAT_LOCAL, | ||
|
|
@@ -39,6 +103,14 @@ enum cell_stat_idx { | |
| NR_CSTATS, | ||
| }; | ||
|
|
||
| /* Function invocation counters */ | ||
| enum fn_counter_idx { | ||
| COUNTER_SELECT_CPU, | ||
| COUNTER_ENQUEUE, | ||
| COUNTER_DISPATCH, | ||
| NR_COUNTERS, | ||
| }; | ||
|
|
||
| struct cpu_ctx { | ||
| u64 cstats[MAX_CELLS][NR_CSTATS]; | ||
| u64 cell_cycles[MAX_CELLS]; | ||
|
|
@@ -51,14 +123,4 @@ struct cgrp_ctx { | |
| bool cell_owner; | ||
| }; | ||
|
|
||
| /* | ||
| * cell is the per-cell book-keeping | ||
| */ | ||
| struct cell { | ||
| // current vtime of the cell | ||
| u64 vtime_now; | ||
| // Whether or not the cell is used or not | ||
| u32 in_use; | ||
| }; | ||
|
|
||
| #endif /* __INTF_H */ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,4 @@ | ||
| /* Force userspace path for Rust bindgen */ | ||
| #undef __BPF__ | ||
| #undef __bpf__ | ||
| #include "intf.h" |
Uh oh!
There was an error while loading. Please reload this page.