|
| 1 | +From 8c89b5db7a2894e33417dd69680729e8f65f5709 Mon Sep 17 00:00:00 2001 |
| 2 | +From: Yonghong Song < [email protected]> |
| 3 | +Date: Mon, 18 Apr 2022 21:32:30 -0700 |
| 4 | +Subject: [PATCH] selftests/bpf: Limit unroll_count for pyperf600 test |
| 5 | + |
| 6 | +LLVM commit [1] changed loop pragma behavior such that |
| 7 | +full loop unroll is always honored with user pragma. |
| 8 | +Previously, unroll count also depends on the unrolled |
| 9 | +code size. For pyperf600, without [1], the loop unroll |
| 10 | +count is 150. With [1], the loop unroll count is 600. |
| 11 | + |
| 12 | +The unroll count of 600 caused the program size close to |
| 13 | +298k and this caused the following code is generated: |
| 14 | + 0: 7b 1a 00 ff 00 00 00 00 *(u64 *)(r10 - 256) = r1 |
| 15 | + ; uint64_t pid_tgid = bpf_get_current_pid_tgid(); |
| 16 | + 1: 85 00 00 00 0e 00 00 00 call 14 |
| 17 | + 2: bf 06 00 00 00 00 00 00 r6 = r0 |
| 18 | + ; pid_t pid = (pid_t)(pid_tgid >> 32); |
| 19 | + 3: bf 61 00 00 00 00 00 00 r1 = r6 |
| 20 | + 4: 77 01 00 00 20 00 00 00 r1 >>= 32 |
| 21 | + 5: 63 1a fc ff 00 00 00 00 *(u32 *)(r10 - 4) = r1 |
| 22 | + 6: bf a2 00 00 00 00 00 00 r2 = r10 |
| 23 | + 7: 07 02 00 00 fc ff ff ff r2 += -4 |
| 24 | + ; PidData* pidData = bpf_map_lookup_elem(&pidmap, &pid); |
| 25 | + 8: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll |
| 26 | + 10: 85 00 00 00 01 00 00 00 call 1 |
| 27 | + 11: bf 08 00 00 00 00 00 00 r8 = r0 |
| 28 | + ; if (!pidData) |
| 29 | + 12: 15 08 15 e8 00 00 00 00 if r8 == 0 goto -6123 <LBB0_27588+0xffffffffffdae100> |
| 30 | + |
| 31 | +Note that insn 12 has a branch offset -6123 which is clearly illegal |
| 32 | +and will be rejected by the verifier. The negative offset is due to |
| 33 | +the branch range is greater than INT16_MAX. |
| 34 | + |
| 35 | +This patch changed the unroll count to be 150 to avoid above |
| 36 | +branch target insn out-of-range issue. Also the llvm is enhanced ([2]) |
| 37 | +to assert if the branch target insn is out of INT16 range. |
| 38 | + |
| 39 | + [1] https://reviews.llvm.org/D119148 |
| 40 | + [2] https://reviews.llvm.org/D123877 |
| 41 | + |
| 42 | +Signed-off-by: Yonghong Song < [email protected]> |
| 43 | +Signed-off-by: Alexei Starovoitov < [email protected]> |
| 44 | +Link: https://lore.kernel.org/bpf/ [email protected] |
| 45 | +--- |
| 46 | + tools/testing/selftests/bpf/progs/pyperf.h | 4 ++++ |
| 47 | + tools/testing/selftests/bpf/progs/pyperf600.c | 11 +++++++---- |
| 48 | + 2 files changed, 11 insertions(+), 4 deletions(-) |
| 49 | + |
| 50 | +diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h |
| 51 | +index 1ed28882daf3..5d3dc4d66d47 100644 |
| 52 | +--- a/tools/testing/selftests/bpf/progs/pyperf.h |
| 53 | ++++ b/tools/testing/selftests/bpf/progs/pyperf.h |
| 54 | +@@ -299,7 +299,11 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx) |
| 55 | + #ifdef NO_UNROLL |
| 56 | + #pragma clang loop unroll(disable) |
| 57 | + #else |
| 58 | ++#ifdef UNROLL_COUNT |
| 59 | ++#pragma clang loop unroll_count(UNROLL_COUNT) |
| 60 | ++#else |
| 61 | + #pragma clang loop unroll(full) |
| 62 | ++#endif |
| 63 | + #endif /* NO_UNROLL */ |
| 64 | + /* Unwind python stack */ |
| 65 | + for (int i = 0; i < STACK_MAX_LEN; ++i) { |
| 66 | +diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c |
| 67 | +index cb49b89e37cd..ce1aa5189cc4 100644 |
| 68 | +--- a/tools/testing/selftests/bpf/progs/pyperf600.c |
| 69 | ++++ b/tools/testing/selftests/bpf/progs/pyperf600.c |
| 70 | +@@ -1,9 +1,12 @@ |
| 71 | + // SPDX-License-Identifier: GPL-2.0 |
| 72 | + // Copyright (c) 2019 Facebook |
| 73 | + #define STACK_MAX_LEN 600 |
| 74 | +-/* clang will not unroll the loop 600 times. |
| 75 | +- * Instead it will unroll it to the amount it deemed |
| 76 | +- * appropriate, but the loop will still execute 600 times. |
| 77 | +- * Total program size is around 90k insns |
| 78 | ++/* Full unroll of 600 iterations will have total |
| 79 | ++ * program size close to 298k insns and this may |
| 80 | ++ * cause BPF_JMP insn out of 16-bit integer range. |
| 81 | ++ * So limit the unroll size to 150 so the |
| 82 | ++ * total program size is around 80k insns but |
| 83 | ++ * the loop will still execute 600 times. |
| 84 | + */ |
| 85 | ++#define UNROLL_COUNT 150 |
| 86 | + #include "pyperf.h" |
| 87 | +-- |
| 88 | +2.30.2 |
| 89 | + |
0 commit comments