diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index db1aeacd4cd99..8c0a36f72d6fc 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -135,6 +135,12 @@ enum aarch64_insn_special_register { AARCH64_INSN_SPCLREG_SP_EL2 = 0xF210 }; +enum aarch64_insn_system_register { + AARCH64_INSN_SYSREG_TPIDR_EL1 = 0x4684, + AARCH64_INSN_SYSREG_TPIDR_EL2 = 0x6682, + AARCH64_INSN_SYSREG_SP_EL0 = 0x4208, +}; + enum aarch64_insn_variant { AARCH64_INSN_VARIANT_32BIT, AARCH64_INSN_VARIANT_64BIT @@ -686,6 +692,8 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, } #endif u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); +u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, + enum aarch64_insn_system_register sysreg); s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index a635ab83fee35..b008a9b46a7ff 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -1515,3 +1515,14 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) return insn; } + +u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result, + enum aarch64_insn_system_register sysreg) +{ + u32 insn = aarch64_insn_get_mrs_value(); + + insn &= ~GENMASK(19, 0); + insn |= sysreg << 5; + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, + insn, result); +} diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index 23b1b34db088e..b22ab2f97a300 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -297,4 +297,12 @@ #define A64_ADR(Rd, offset) \ aarch64_insn_gen_adr(0, offset, Rd, AARCH64_INSN_ADR_TYPE_ADR) +/* MRS */ +#define A64_MRS_TPIDR_EL1(Rt) \ + aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL1) +#define A64_MRS_TPIDR_EL2(Rt) \ + aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL2) +#define A64_MRS_SP_EL0(Rt) \ + aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_SP_EL0) + #endif /* _BPF_JIT_H */ diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 76b91f36c729c..8084f3e61e0b4 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -877,6 +877,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, emit(A64_ORR(1, tmp, dst, tmp), ctx); emit(A64_MOV(1, dst, tmp), ctx); break; + } else if (insn_is_mov_percpu_addr(insn)) { + if (dst != src) + emit(A64_MOV(1, dst, src), ctx); + if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN)) + emit(A64_MRS_TPIDR_EL2(tmp), ctx); + else + emit(A64_MRS_TPIDR_EL1(tmp), ctx); + emit(A64_ADD(1, dst, dst, tmp), ctx); + break; } switch (insn->off) { case 0: @@ -1206,6 +1215,19 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, const u8 r0 = bpf2a64[BPF_REG_0]; bool func_addr_fixed; u64 func_addr; + u32 cpu_offset = offsetof(struct thread_info, cpu); + + /* Implement helper call to bpf_get_smp_processor_id() inline */ + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { + emit(A64_MRS_SP_EL0(tmp), ctx); + if (is_lsi_offset(cpu_offset, 2)) { + emit(A64_LDR32I(r0, tmp, cpu_offset), ctx); + } else { + emit_a64_mov_i(1, tmp2, cpu_offset, ctx); + emit(A64_LDR32(r0, tmp, tmp2), ctx); + } + break; + } ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &func_addr, &func_addr_fixed); @@ -2527,6 +2549,21 @@ bool bpf_jit_supports_arena(void) return true; } +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + return true; + } + + return false; +} + void bpf_jit_free(struct bpf_prog *prog) { if (prog->jited) { diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index 15e482f2c6572..5789b7afae47b 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "bpf_jit.h" #define RV_FENTRY_NINSNS 2 @@ -1089,6 +1090,24 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, emit_or(RV_REG_T1, rd, RV_REG_T1, ctx); emit_mv(rd, RV_REG_T1, ctx); break; + } else if (insn_is_mov_percpu_addr(insn)) { + if (rd != rs) + emit_mv(rd, rs, ctx); +#ifdef CONFIG_SMP + /* Load current CPU number in T1 */ + emit_ld(RV_REG_T1, offsetof(struct thread_info, cpu), + RV_REG_TP, ctx); + /* << 3 because offsets are 8 bytes */ + emit_slli(RV_REG_T1, RV_REG_T1, 3, ctx); + /* Load address of __per_cpu_offset array in T2 */ + emit_addr(RV_REG_T2, (u64)&__per_cpu_offset, extra_pass, ctx); + /* Add offset of current CPU to __per_cpu_offset */ + emit_add(RV_REG_T1, RV_REG_T2, RV_REG_T1, ctx); + /* Load __per_cpu_offset[cpu] in T1 */ + emit_ld(RV_REG_T1, 0, RV_REG_T1, ctx); + /* Add the offset to Rd */ + emit_add(rd, rd, RV_REG_T1, ctx); +#endif } if (imm == 1) { /* Special mov32 for zext */ @@ -1474,6 +1493,22 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, bool fixed_addr; u64 addr; + /* Inline calls to bpf_get_smp_processor_id() + * + * RV_REG_TP holds the address of the current CPU's task_struct and thread_info is + * at offset 0 in task_struct. + * Load cpu from thread_info: + * Set R0 to ((struct thread_info *)(RV_REG_TP))->cpu + * + * This replicates the implementation of raw_smp_processor_id() on RISCV + */ + if (insn->src_reg == 0 && insn->imm == BPF_FUNC_get_smp_processor_id) { + /* Load current CPU number in R0 */ + emit_ld(bpf_to_rv_reg(BPF_REG_0, ctx), offsetof(struct thread_info, cpu), + RV_REG_TP, ctx); + break; + } + mark_call(ctx); ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, &addr, &fixed_addr); @@ -2038,3 +2073,18 @@ bool bpf_jit_supports_arena(void) { return true; } + +bool bpf_jit_supports_percpu_insn(void) +{ + return true; +} + +bool bpf_jit_inlines_helper_call(s32 imm) +{ + switch (imm) { + case BPF_FUNC_get_smp_processor_id: + return true; + } + + return false; +} diff --git a/include/linux/filter.h b/include/linux/filter.h index 7a27f19bf44d0..3e19bb62ed1ad 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -993,6 +993,7 @@ u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); void bpf_jit_compile(struct bpf_prog *prog); bool bpf_jit_needs_zext(void); +bool bpf_jit_inlines_helper_call(s32 imm); bool bpf_jit_supports_subprog_tailcalls(void); bool bpf_jit_supports_percpu_insn(void); bool bpf_jit_supports_kfunc_call(void); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 99b8b1c9a248c..aa59af9f9bd9b 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2941,6 +2941,17 @@ bool __weak bpf_jit_needs_zext(void) return false; } +/* Return true if the JIT inlines the call to the helper corresponding to + * the imm. + * + * The verifier will not patch the insn->imm for the call to the helper if + * this returns true. + */ +bool __weak bpf_jit_inlines_helper_call(s32 imm) +{ + return false; +} + /* Return TRUE if the JIT backend supports mixing bpf2bpf and tailcalls. */ bool __weak bpf_jit_supports_subprog_tailcalls(void) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 7360f04f9ec7b..385111b07a815 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -20020,6 +20020,8 @@ static int do_misc_fixups(struct bpf_verifier_env *env) goto next_insn; } + if (bpf_jit_inlines_helper_call(insn->imm)) + goto next_insn; if (insn->imm == BPF_FUNC_get_route_realm) prog->dst_needed = 1; if (insn->imm == BPF_FUNC_get_prandom_u32) diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 627b74ae041b5..b6706951b8d4f 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -512,6 +512,10 @@ extern const struct bench bench_trig_fmodret; extern const struct bench bench_trig_tp; extern const struct bench bench_trig_rawtp; +extern const struct bench bench_trig_arr_inc; +extern const struct bench bench_trig_hash_inc; +extern const struct bench bench_trig_glob_arr_inc; + /* uprobe/uretprobe benchmarks */ extern const struct bench bench_trig_uprobe_nop; extern const struct bench bench_trig_uretprobe_nop; @@ -566,6 +570,11 @@ static const struct bench *benchs[] = { &bench_trig_fmodret, &bench_trig_tp, &bench_trig_rawtp, + + &bench_trig_arr_inc, + &bench_trig_hash_inc, + &bench_trig_glob_arr_inc, + /* uprobes */ &bench_trig_uprobe_nop, &bench_trig_uretprobe_nop, diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 4b05539f167df..201dcea5dcd3f 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -225,6 +225,33 @@ static void trigger_fentry_setup(void) attach_bpf(ctx.skel->progs.bench_trigger_fentry); } +static void trigger_arr_inc_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_arr_inc, true); + load_ctx(); + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_arr_inc); +} + +static void trigger_hash_inc_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_hash_inc, true); + load_ctx(); + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_hash_inc); +} + +static void trigger_glob_arr_inc_setup(void) +{ + setup_ctx(); + bpf_program__set_autoload(ctx.skel->progs.trigger_driver, false); + bpf_program__set_autoload(ctx.skel->progs.trigger_glob_arr_inc, true); + load_ctx(); + ctx.driver_prog_fd = bpf_program__fd(ctx.skel->progs.trigger_glob_arr_inc); +} + static void trigger_fexit_setup(void) { setup_ctx(); @@ -435,6 +462,10 @@ BENCH_TRIG_KERNEL(fmodret, "fmodret"); BENCH_TRIG_KERNEL(tp, "tp"); BENCH_TRIG_KERNEL(rawtp, "rawtp"); +BENCH_TRIG_KERNEL(arr_inc, "arr-inc"); +BENCH_TRIG_KERNEL(hash_inc, "hash-inc"); +BENCH_TRIG_KERNEL(glob_arr_inc, "glob-arr-inc"); + /* uprobe benchmarks */ #define BENCH_TRIG_USERMODE(KIND, PRODUCER, NAME) \ const struct bench bench_trig_##KIND = { \ diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 2619ed193c65a..cf3b4bf548013 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -25,6 +25,50 @@ static __always_inline void inc_counter(void) __sync_add_and_fetch(&hits[cpu & CPU_MASK].value, 1); } +static __always_inline void inc_counter2(int amount) +{ + int cpu = bpf_get_smp_processor_id(); + + __sync_add_and_fetch(&hits[cpu & CPU_MASK].value, amount); +} + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_HASH); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); +} hash_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); +} array_map SEC(".maps"); + +static int zero = 0; + +static void __always_inline hash_inc(void *map) { + int *p; + + p = bpf_map_lookup_elem(map, &zero); + if (!p) { + bpf_map_update_elem(map, &zero, &zero, BPF_ANY); + p = bpf_map_lookup_elem(map, &zero); + if (!p) + return; + } + *p += 1; +} + +struct counter arr[256]; + +static void __always_inline glob_arr_inc(void) { + int cpu = bpf_get_smp_processor_id(); + + arr[cpu].value += 1; +} + SEC("?uprobe") int bench_trigger_uprobe(void *ctx) { @@ -34,6 +78,45 @@ int bench_trigger_uprobe(void *ctx) const volatile int batch_iters = 0; +SEC("?raw_tp") +int trigger_arr_inc(void *ctx) +{ + int i; + + for (i = 0; i < batch_iters; i++) + hash_inc(&array_map); + + inc_counter2(batch_iters); + + return 0; +} + +SEC("?raw_tp") +int trigger_hash_inc(void *ctx) +{ + int i; + + for (i = 0; i < batch_iters; i++) + hash_inc(&hash_map); + + inc_counter2(batch_iters); + + return 0; +} + +SEC("?raw_tp") +int trigger_glob_arr_inc(void *ctx) +{ + int i; + + for (i = 0; i < batch_iters; i++) + glob_arr_inc(); + + inc_counter2(batch_iters); + + return 0; +} + SEC("?raw_tp") int trigger_count(void *ctx) {