Skip to content

Commit c272e25

Browse files
author
Alexei Starovoitov
committed
Merge branch 'bpf: refine kernel.unprivileged_bpf_disabled behaviour'
Alan Maguire says: ==================== Unprivileged BPF disabled (kernel.unprivileged_bpf_disabled >= 1) is the default in most cases now; when set, the BPF system call is blocked for users without CAP_BPF/CAP_SYS_ADMIN. In some cases however, it makes sense to split activities between capability-requiring ones - such as program load/attach - and those that might not require capabilities such as reading perf/ringbuf events, reading or updating BPF map configuration etc. One example of this sort of approach is a service that loads a BPF program, and a user-space program that interacts with it. Here - rather than blocking all BPF syscall commands - unprivileged BPF disabled blocks the key object-creating commands (prog load, map load). Discussion has alluded to this idea in the past [1], and Alexei mentioned it was also discussed at LSF/MM/BPF this year. Changes since v3 [2]: - added acks to patch 1 - CI was failing on Ubuntu; I suspect the issue was an old capability.h file which specified CAP_LAST_CAP as < CAP_BPF, leading to the logic disabling all caps not disabling CAP_BPF. Use CAP_BPF as basis for "all caps" bitmap instead as we explicitly define it in cap_helpers.h if not already found in capabilities.h - made global variables arguments to subtests instead (Andrii, patch 2) Changes since v2 [3]: - added acks from Yonghong - clang compilation issue in selftest with bpf_prog_query() (Alexei, patch 2) - disable all capabilities for test (Yonghong, patch 2) - add assertions that size of perf/ringbuf data matches expectations (Yonghong, patch 2) - add map array size definition, remove unneeded whitespace (Yonghong, patch 2) Changes since RFC [4]: - widened scope of commands unprivileged BPF disabled allows (Alexei, patch 1) - removed restrictions on map types for lookup, update, delete (Alexei, patch 1) - removed kernel CONFIG parameter controlling unprivileged bpf disabled change (Alexei, patch 1) - widened test scope to cover most BPF syscall commands, with positive and negative subtests [1] https://lore.kernel.org/bpf/CAADnVQLTBhCTAx1a_nev7CgMZxv1Bb7ecz1AFRin8tHmjPREJA@mail.gmail.com/ [2] https://lore.kernel.org/bpf/[email protected]/T/ [3] https://lore.kernel.org/bpf/[email protected]/T/#t [4] https://lore.kernel.org/bpf/20220511163604.5kuczj6jx3ec5qv6@MBP-98dd607d3435.dhcp.thefacebook.com/T/#mae65f35a193279e718f37686da636094d69b96ee ==================== Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 9794976 + 90a039f commit c272e25

File tree

3 files changed

+408
-1
lines changed

3 files changed

+408
-1
lines changed

kernel/bpf/syscall.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4863,9 +4863,21 @@ static int bpf_prog_bind_map(union bpf_attr *attr)
48634863
static int __sys_bpf(int cmd, bpfptr_t uattr, unsigned int size)
48644864
{
48654865
union bpf_attr attr;
4866+
bool capable;
48664867
int err;
48674868

4868-
if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
4869+
capable = bpf_capable() || !sysctl_unprivileged_bpf_disabled;
4870+
4871+
/* Intent here is for unprivileged_bpf_disabled to block key object
4872+
* creation commands for unprivileged users; other actions depend
4873+
* of fd availability and access to bpffs, so are dependent on
4874+
* object creation success. Capabilities are later verified for
4875+
* operations such as load and map create, so even with unprivileged
4876+
* BPF disabled, capability checks are still carried out for these
4877+
* and other operations.
4878+
*/
4879+
if (!capable &&
4880+
(cmd == BPF_MAP_CREATE || cmd == BPF_PROG_LOAD))
48694881
return -EPERM;
48704882

48714883
err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
Lines changed: 312 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,312 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright (c) 2022, Oracle and/or its affiliates. */
3+
4+
#include <test_progs.h>
5+
#include <bpf/btf.h>
6+
7+
#include "test_unpriv_bpf_disabled.skel.h"
8+
9+
#include "cap_helpers.h"
10+
11+
/* Using CAP_LAST_CAP is risky here, since it can get pulled in from
12+
* an old /usr/include/linux/capability.h and be < CAP_BPF; as a result
13+
* CAP_BPF would not be included in ALL_CAPS. Instead use CAP_BPF as
14+
* we know its value is correct since it is explicitly defined in
15+
* cap_helpers.h.
16+
*/
17+
#define ALL_CAPS ((2ULL << CAP_BPF) - 1)
18+
19+
#define PINPATH "/sys/fs/bpf/unpriv_bpf_disabled_"
20+
#define NUM_MAPS 7
21+
22+
static __u32 got_perfbuf_val;
23+
static __u32 got_ringbuf_val;
24+
25+
static int process_ringbuf(void *ctx, void *data, size_t len)
26+
{
27+
if (ASSERT_EQ(len, sizeof(__u32), "ringbuf_size_valid"))
28+
got_ringbuf_val = *(__u32 *)data;
29+
return 0;
30+
}
31+
32+
static void process_perfbuf(void *ctx, int cpu, void *data, __u32 len)
33+
{
34+
if (ASSERT_EQ(len, sizeof(__u32), "perfbuf_size_valid"))
35+
got_perfbuf_val = *(__u32 *)data;
36+
}
37+
38+
static int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val)
39+
{
40+
int ret = 0;
41+
FILE *fp;
42+
43+
fp = fopen(sysctl_path, "r+");
44+
if (!fp)
45+
return -errno;
46+
if (old_val && fscanf(fp, "%s", old_val) <= 0) {
47+
ret = -ENOENT;
48+
} else if (!old_val || strcmp(old_val, new_val) != 0) {
49+
fseek(fp, 0, SEEK_SET);
50+
if (fprintf(fp, "%s", new_val) < 0)
51+
ret = -errno;
52+
}
53+
fclose(fp);
54+
55+
return ret;
56+
}
57+
58+
static void test_unpriv_bpf_disabled_positive(struct test_unpriv_bpf_disabled *skel,
59+
__u32 prog_id, int prog_fd, int perf_fd,
60+
char **map_paths, int *map_fds)
61+
{
62+
struct perf_buffer *perfbuf = NULL;
63+
struct ring_buffer *ringbuf = NULL;
64+
int i, nr_cpus, link_fd = -1;
65+
66+
nr_cpus = bpf_num_possible_cpus();
67+
68+
skel->bss->perfbuf_val = 1;
69+
skel->bss->ringbuf_val = 2;
70+
71+
/* Positive tests for unprivileged BPF disabled. Verify we can
72+
* - retrieve and interact with pinned maps;
73+
* - set up and interact with perf buffer;
74+
* - set up and interact with ring buffer;
75+
* - create a link
76+
*/
77+
perfbuf = perf_buffer__new(bpf_map__fd(skel->maps.perfbuf), 8, process_perfbuf, NULL, NULL,
78+
NULL);
79+
if (!ASSERT_OK_PTR(perfbuf, "perf_buffer__new"))
80+
goto cleanup;
81+
82+
ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), process_ringbuf, NULL, NULL);
83+
if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
84+
goto cleanup;
85+
86+
/* trigger & validate perf event, ringbuf output */
87+
usleep(1);
88+
89+
ASSERT_GT(perf_buffer__poll(perfbuf, 100), -1, "perf_buffer__poll");
90+
ASSERT_EQ(got_perfbuf_val, skel->bss->perfbuf_val, "check_perfbuf_val");
91+
ASSERT_EQ(ring_buffer__consume(ringbuf), 1, "ring_buffer__consume");
92+
ASSERT_EQ(got_ringbuf_val, skel->bss->ringbuf_val, "check_ringbuf_val");
93+
94+
for (i = 0; i < NUM_MAPS; i++) {
95+
map_fds[i] = bpf_obj_get(map_paths[i]);
96+
if (!ASSERT_GT(map_fds[i], -1, "obj_get"))
97+
goto cleanup;
98+
}
99+
100+
for (i = 0; i < NUM_MAPS; i++) {
101+
bool prog_array = strstr(map_paths[i], "prog_array") != NULL;
102+
bool array = strstr(map_paths[i], "array") != NULL;
103+
bool buf = strstr(map_paths[i], "buf") != NULL;
104+
__u32 key = 0, vals[nr_cpus], lookup_vals[nr_cpus];
105+
__u32 expected_val = 1;
106+
int j;
107+
108+
/* skip ringbuf, perfbuf */
109+
if (buf)
110+
continue;
111+
112+
for (j = 0; j < nr_cpus; j++)
113+
vals[j] = expected_val;
114+
115+
if (prog_array) {
116+
/* need valid prog array value */
117+
vals[0] = prog_fd;
118+
/* prog array lookup returns prog id, not fd */
119+
expected_val = prog_id;
120+
}
121+
ASSERT_OK(bpf_map_update_elem(map_fds[i], &key, vals, 0), "map_update_elem");
122+
ASSERT_OK(bpf_map_lookup_elem(map_fds[i], &key, &lookup_vals), "map_lookup_elem");
123+
ASSERT_EQ(lookup_vals[0], expected_val, "map_lookup_elem_values");
124+
if (!array)
125+
ASSERT_OK(bpf_map_delete_elem(map_fds[i], &key), "map_delete_elem");
126+
}
127+
128+
link_fd = bpf_link_create(bpf_program__fd(skel->progs.handle_perf_event), perf_fd,
129+
BPF_PERF_EVENT, NULL);
130+
ASSERT_GT(link_fd, 0, "link_create");
131+
132+
cleanup:
133+
if (link_fd)
134+
close(link_fd);
135+
if (perfbuf)
136+
perf_buffer__free(perfbuf);
137+
if (ringbuf)
138+
ring_buffer__free(ringbuf);
139+
}
140+
141+
static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *skel,
142+
__u32 prog_id, int prog_fd, int perf_fd,
143+
char **map_paths, int *map_fds)
144+
{
145+
const struct bpf_insn prog_insns[] = {
146+
BPF_MOV64_IMM(BPF_REG_0, 0),
147+
BPF_EXIT_INSN(),
148+
};
149+
const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
150+
LIBBPF_OPTS(bpf_prog_load_opts, load_opts);
151+
struct bpf_map_info map_info = {};
152+
__u32 map_info_len = sizeof(map_info);
153+
struct bpf_link_info link_info = {};
154+
__u32 link_info_len = sizeof(link_info);
155+
struct btf *btf = NULL;
156+
__u32 attach_flags = 0;
157+
__u32 prog_ids[3] = {};
158+
__u32 prog_cnt = 3;
159+
__u32 next;
160+
int i;
161+
162+
/* Negative tests for unprivileged BPF disabled. Verify we cannot
163+
* - load BPF programs;
164+
* - create BPF maps;
165+
* - get a prog/map/link fd by id;
166+
* - get next prog/map/link id
167+
* - query prog
168+
* - BTF load
169+
*/
170+
ASSERT_EQ(bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "simple_prog", "GPL",
171+
prog_insns, prog_insn_cnt, &load_opts),
172+
-EPERM, "prog_load_fails");
173+
174+
for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_BLOOM_FILTER; i++)
175+
ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL),
176+
-EPERM, "map_create_fails");
177+
178+
ASSERT_EQ(bpf_prog_get_fd_by_id(prog_id), -EPERM, "prog_get_fd_by_id_fails");
179+
ASSERT_EQ(bpf_prog_get_next_id(prog_id, &next), -EPERM, "prog_get_next_id_fails");
180+
ASSERT_EQ(bpf_prog_get_next_id(0, &next), -EPERM, "prog_get_next_id_fails");
181+
182+
if (ASSERT_OK(bpf_obj_get_info_by_fd(map_fds[0], &map_info, &map_info_len),
183+
"obj_get_info_by_fd")) {
184+
ASSERT_EQ(bpf_map_get_fd_by_id(map_info.id), -EPERM, "map_get_fd_by_id_fails");
185+
ASSERT_EQ(bpf_map_get_next_id(map_info.id, &next), -EPERM,
186+
"map_get_next_id_fails");
187+
}
188+
ASSERT_EQ(bpf_map_get_next_id(0, &next), -EPERM, "map_get_next_id_fails");
189+
190+
if (ASSERT_OK(bpf_obj_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter),
191+
&link_info, &link_info_len),
192+
"obj_get_info_by_fd")) {
193+
ASSERT_EQ(bpf_link_get_fd_by_id(link_info.id), -EPERM, "link_get_fd_by_id_fails");
194+
ASSERT_EQ(bpf_link_get_next_id(link_info.id, &next), -EPERM,
195+
"link_get_next_id_fails");
196+
}
197+
ASSERT_EQ(bpf_link_get_next_id(0, &next), -EPERM, "link_get_next_id_fails");
198+
199+
ASSERT_EQ(bpf_prog_query(prog_fd, BPF_TRACE_FENTRY, 0, &attach_flags, prog_ids,
200+
&prog_cnt), -EPERM, "prog_query_fails");
201+
202+
btf = btf__new_empty();
203+
if (ASSERT_OK_PTR(btf, "empty_btf") &&
204+
ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "unpriv_int_type")) {
205+
const void *raw_btf_data;
206+
__u32 raw_btf_size;
207+
208+
raw_btf_data = btf__raw_data(btf, &raw_btf_size);
209+
if (ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good"))
210+
ASSERT_EQ(bpf_btf_load(raw_btf_data, raw_btf_size, NULL), -EPERM,
211+
"bpf_btf_load_fails");
212+
}
213+
btf__free(btf);
214+
}
215+
216+
void test_unpriv_bpf_disabled(void)
217+
{
218+
char *map_paths[NUM_MAPS] = { PINPATH "array",
219+
PINPATH "percpu_array",
220+
PINPATH "hash",
221+
PINPATH "percpu_hash",
222+
PINPATH "perfbuf",
223+
PINPATH "ringbuf",
224+
PINPATH "prog_array" };
225+
int map_fds[NUM_MAPS];
226+
struct test_unpriv_bpf_disabled *skel;
227+
char unprivileged_bpf_disabled_orig[32] = {};
228+
char perf_event_paranoid_orig[32] = {};
229+
struct bpf_prog_info prog_info = {};
230+
__u32 prog_info_len = sizeof(prog_info);
231+
struct perf_event_attr attr = {};
232+
int prog_fd, perf_fd = -1, i, ret;
233+
__u64 save_caps = 0;
234+
__u32 prog_id;
235+
236+
skel = test_unpriv_bpf_disabled__open_and_load();
237+
if (!ASSERT_OK_PTR(skel, "skel_open"))
238+
return;
239+
240+
skel->bss->test_pid = getpid();
241+
242+
map_fds[0] = bpf_map__fd(skel->maps.array);
243+
map_fds[1] = bpf_map__fd(skel->maps.percpu_array);
244+
map_fds[2] = bpf_map__fd(skel->maps.hash);
245+
map_fds[3] = bpf_map__fd(skel->maps.percpu_hash);
246+
map_fds[4] = bpf_map__fd(skel->maps.perfbuf);
247+
map_fds[5] = bpf_map__fd(skel->maps.ringbuf);
248+
map_fds[6] = bpf_map__fd(skel->maps.prog_array);
249+
250+
for (i = 0; i < NUM_MAPS; i++)
251+
ASSERT_OK(bpf_obj_pin(map_fds[i], map_paths[i]), "pin map_fd");
252+
253+
/* allow user without caps to use perf events */
254+
if (!ASSERT_OK(sysctl_set("/proc/sys/kernel/perf_event_paranoid", perf_event_paranoid_orig,
255+
"-1"),
256+
"set_perf_event_paranoid"))
257+
goto cleanup;
258+
/* ensure unprivileged bpf disabled is set */
259+
ret = sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled",
260+
unprivileged_bpf_disabled_orig, "2");
261+
if (ret == -EPERM) {
262+
/* if unprivileged_bpf_disabled=1, we get -EPERM back; that's okay. */
263+
if (!ASSERT_OK(strcmp(unprivileged_bpf_disabled_orig, "1"),
264+
"unpriviliged_bpf_disabled_on"))
265+
goto cleanup;
266+
} else {
267+
if (!ASSERT_OK(ret, "set unpriviliged_bpf_disabled"))
268+
goto cleanup;
269+
}
270+
271+
prog_fd = bpf_program__fd(skel->progs.sys_nanosleep_enter);
272+
ASSERT_OK(bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len),
273+
"obj_get_info_by_fd");
274+
prog_id = prog_info.id;
275+
ASSERT_GT(prog_id, 0, "valid_prog_id");
276+
277+
attr.size = sizeof(attr);
278+
attr.type = PERF_TYPE_SOFTWARE;
279+
attr.config = PERF_COUNT_SW_CPU_CLOCK;
280+
attr.freq = 1;
281+
attr.sample_freq = 1000;
282+
perf_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
283+
if (!ASSERT_GE(perf_fd, 0, "perf_fd"))
284+
goto cleanup;
285+
286+
if (!ASSERT_OK(test_unpriv_bpf_disabled__attach(skel), "skel_attach"))
287+
goto cleanup;
288+
289+
if (!ASSERT_OK(cap_disable_effective(ALL_CAPS, &save_caps), "disable caps"))
290+
goto cleanup;
291+
292+
if (test__start_subtest("unpriv_bpf_disabled_positive"))
293+
test_unpriv_bpf_disabled_positive(skel, prog_id, prog_fd, perf_fd, map_paths,
294+
map_fds);
295+
296+
if (test__start_subtest("unpriv_bpf_disabled_negative"))
297+
test_unpriv_bpf_disabled_negative(skel, prog_id, prog_fd, perf_fd, map_paths,
298+
map_fds);
299+
300+
cleanup:
301+
close(perf_fd);
302+
if (save_caps)
303+
cap_enable_effective(save_caps, NULL);
304+
if (strlen(perf_event_paranoid_orig) > 0)
305+
sysctl_set("/proc/sys/kernel/perf_event_paranoid", NULL, perf_event_paranoid_orig);
306+
if (strlen(unprivileged_bpf_disabled_orig) > 0)
307+
sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled", NULL,
308+
unprivileged_bpf_disabled_orig);
309+
for (i = 0; i < NUM_MAPS; i++)
310+
unlink(map_paths[i]);
311+
test_unpriv_bpf_disabled__destroy(skel);
312+
}

0 commit comments

Comments
 (0)