Skip to content

Commit f1d8c65

Browse files
author
Alexei Starovoitov
committed
Merge branch 'percpu_hash-maps'
Leon Hwang says: ==================== In the discussion thread "[PATCH bpf-next v9 0/7] bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu maps"[1], it was pointed out that missing calls to bpf_obj_free_fields() could lead to memory leaks. A selftest was added to confirm that this is indeed a real issue - the refcount of BPF_KPTR_REF field is not decremented when bpf_obj_free_fields() is missing after copy_map_value[,_long](). Further inspection of copy_map_value[,_long]() call sites revealed two locations affected by this issue: 1. pcpu_copy_value() 2. htab_map_update_elem() when used with BPF_F_LOCK Similar case happens when update local storage maps with BPF_F_LOCK. This series fixes the cases where BPF_F_LOCK is not involved by properly calling bpf_obj_free_fields() after copy_map_value[,_long](), and adds a selftest to verify the fix. The remaining cases involving BPF_F_LOCK will be addressed in a separate patch set after the series "bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags for percpu maps" is applied. Changes: v5 -> v6: * Update the test name to include "refcounted_kptr". * Update some local variables' name in the test (per Alexei). * v5: https://lore.kernel.org/bpf/[email protected]/ v4 -> v5: * Use a local variable to store the this_cpu_ptr()/per_cpu_ptr() result, and reuse it between copy_map_value[,_long]() and bpf_obj_free_fields() in patch #1 (per Andrii). * Drop patch #2 and #3, because the combination of BPF_F_LOCK with other special fields (except for BPF_SPIN_LOCK) will be disallowed on the UAPI side in the future (per Alexei). * v4: https://lore.kernel.org/bpf/[email protected]/ v3 -> v4: * Target bpf-next tree. * Address comments from Amery: * Drop 'bpf_obj_free_fields()' in the path of updating local storage maps without BPF_F_LOCK. * Drop the corresponding self test. * Respin the other test of local storage maps using syscall BPF programs. * v3: https://lore.kernel.org/bpf/[email protected]/ v2 -> v3: * Free special fields when update local storage maps without BPF_F_LOCK. * Add test to verify decrementing refcount when update cgroup local storage maps without BPF_F_LOCK. * Address review from AI bot: * Slow path with BPF_F_LOCK (around line 642-646) in 'bpf_local_storage.c'. * v2: https://lore.kernel.org/bpf/[email protected]/ v1 -> v2: * Add test to verify decrementing refcount when update cgroup local storage maps with BPF_F_LOCK. * Address review from AI bot: * Fast path without bucket lock (around line 610) in 'bpf_local_storage.c'. * v1: https://lore.kernel.org/bpf/[email protected]/ ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Alexei Starovoitov <[email protected]>
2 parents 3249e8a + c1cbf0d commit f1d8c65

File tree

3 files changed

+124
-2
lines changed

3 files changed

+124
-2
lines changed

kernel/bpf/hashtab.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -934,15 +934,21 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
934934
static void pcpu_copy_value(struct bpf_htab *htab, void __percpu *pptr,
935935
void *value, bool onallcpus)
936936
{
937+
void *ptr;
938+
937939
if (!onallcpus) {
938940
/* copy true value_size bytes */
939-
copy_map_value(&htab->map, this_cpu_ptr(pptr), value);
941+
ptr = this_cpu_ptr(pptr);
942+
copy_map_value(&htab->map, ptr, value);
943+
bpf_obj_free_fields(htab->map.record, ptr);
940944
} else {
941945
u32 size = round_up(htab->map.value_size, 8);
942946
int off = 0, cpu;
943947

944948
for_each_possible_cpu(cpu) {
945-
copy_map_value_long(&htab->map, per_cpu_ptr(pptr, cpu), value + off);
949+
ptr = per_cpu_ptr(pptr, cpu);
950+
copy_map_value_long(&htab->map, ptr, value + off);
951+
bpf_obj_free_fields(htab->map.record, ptr);
946952
off += size;
947953
}
948954
}

tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,3 +44,59 @@ void test_refcounted_kptr_wrong_owner(void)
4444
ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
4545
refcounted_kptr__destroy(skel);
4646
}
47+
48+
void test_percpu_hash_refcounted_kptr_refcount_leak(void)
49+
{
50+
struct refcounted_kptr *skel;
51+
int cpu_nr, fd, err, key = 0;
52+
struct bpf_map *map;
53+
size_t values_sz;
54+
u64 *values;
55+
LIBBPF_OPTS(bpf_test_run_opts, opts,
56+
.data_in = &pkt_v4,
57+
.data_size_in = sizeof(pkt_v4),
58+
.repeat = 1,
59+
);
60+
61+
cpu_nr = libbpf_num_possible_cpus();
62+
if (!ASSERT_GT(cpu_nr, 0, "libbpf_num_possible_cpus"))
63+
return;
64+
65+
values = calloc(cpu_nr, sizeof(u64));
66+
if (!ASSERT_OK_PTR(values, "calloc values"))
67+
return;
68+
69+
skel = refcounted_kptr__open_and_load();
70+
if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load")) {
71+
free(values);
72+
return;
73+
}
74+
75+
values_sz = cpu_nr * sizeof(u64);
76+
memset(values, 0, values_sz);
77+
78+
map = skel->maps.percpu_hash;
79+
err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
80+
if (!ASSERT_OK(err, "bpf_map__update_elem"))
81+
goto out;
82+
83+
fd = bpf_program__fd(skel->progs.percpu_hash_refcount_leak);
84+
err = bpf_prog_test_run_opts(fd, &opts);
85+
if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
86+
goto out;
87+
if (!ASSERT_EQ(opts.retval, 2, "opts.retval"))
88+
goto out;
89+
90+
err = bpf_map__update_elem(map, &key, sizeof(key), values, values_sz, 0);
91+
if (!ASSERT_OK(err, "bpf_map__update_elem"))
92+
goto out;
93+
94+
fd = bpf_program__fd(skel->progs.check_percpu_hash_refcount);
95+
err = bpf_prog_test_run_opts(fd, &opts);
96+
ASSERT_OK(err, "bpf_prog_test_run_opts");
97+
ASSERT_EQ(opts.retval, 1, "opts.retval");
98+
99+
out:
100+
refcounted_kptr__destroy(skel);
101+
free(values);
102+
}

tools/testing/selftests/bpf/progs/refcounted_kptr.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -568,4 +568,64 @@ int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock,
568568
return 0;
569569
}
570570

571+
private(kptr_ref) u64 ref;
572+
573+
static int probe_read_refcount(void)
574+
{
575+
u32 refcount;
576+
577+
bpf_probe_read_kernel(&refcount, sizeof(refcount), (void *) ref);
578+
return refcount;
579+
}
580+
581+
static int __insert_in_list(struct bpf_list_head *head, struct bpf_spin_lock *lock,
582+
struct node_data __kptr **node)
583+
{
584+
struct node_data *node_new, *node_ref, *node_old;
585+
586+
node_new = bpf_obj_new(typeof(*node_new));
587+
if (!node_new)
588+
return -1;
589+
590+
node_ref = bpf_refcount_acquire(node_new);
591+
node_old = bpf_kptr_xchg(node, node_new);
592+
if (node_old) {
593+
bpf_obj_drop(node_old);
594+
bpf_obj_drop(node_ref);
595+
return -2;
596+
}
597+
598+
bpf_spin_lock(lock);
599+
bpf_list_push_front(head, &node_ref->l);
600+
ref = (u64)(void *) &node_ref->ref;
601+
bpf_spin_unlock(lock);
602+
return probe_read_refcount();
603+
}
604+
605+
struct {
606+
__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
607+
__type(key, int);
608+
__type(value, struct map_value);
609+
__uint(max_entries, 1);
610+
} percpu_hash SEC(".maps");
611+
612+
SEC("tc")
613+
int percpu_hash_refcount_leak(void *ctx)
614+
{
615+
struct map_value *v;
616+
int key = 0;
617+
618+
v = bpf_map_lookup_elem(&percpu_hash, &key);
619+
if (!v)
620+
return 0;
621+
622+
return __insert_in_list(&head, &lock, &v->node);
623+
}
624+
625+
SEC("tc")
626+
int check_percpu_hash_refcount(void *ctx)
627+
{
628+
return probe_read_refcount();
629+
}
630+
571631
char _license[] SEC("license") = "GPL";

0 commit comments

Comments
 (0)