Skip to content

Commit de45fd5

Browse files
q2venKernel Patches Daemon
authored andcommitted
selftest: bpf: Add test for SK_MEMCG_EXCLUSIVE.
The test does the following for IPv4/IPv6 x TCP/UDP sockets with/without SK_MEMCG_EXCLUSIVE, which can be turned on by net.core.memcg_exclusive or bpf_setsockopt(SK_BPF_MEMCG_EXCLUSIVE). 1. Create socket pairs 2. Send a bunch of data that requires more than 1024 pages 3. Read memory_allocated from sk->sk_prot->memory_allocated and sk->sk_prot->memory_per_cpu_fw_alloc 4. Check if unread data is charged to memory_allocated If SK_MEMCG_EXCLUSIVE is set, memory_allocated should not be changed, but we allow a small error (up to 10 pages) in case other processes on the host use some amounts of TCP/UDP memory. The amount of allocated pages are buffered to per-cpu variable {tcp,udp}_memory_per_cpu_fw_alloc up to +/- net.core.mem_pcpu_rsv before reported to {tcp,udp}_memory_allocated. At 3., memory_allocated is calculated from the 2 variables twice at fentry and fexit of socket create function to check if the per-cpu value is drained during calculation. In that case, 3. is retried. We use kern_sync_rcu() for UDP because UDP recv queue is destroyed after RCU grace period. The test takes ~2s on QEMU (64 CPUs) w/ KVM but takes 6s w/o KVM. # time ./test_progs -t sk_memcg #370/1 sk_memcg/TCP :OK #370/2 sk_memcg/UDP :OK #370/3 sk_memcg/TCPv6:OK #370/4 sk_memcg/UDPv6:OK #370 sk_memcg:OK Summary: 1/4 PASSED, 0 SKIPPED, 0 FAILED real 0m1.623s user 0m0.165s sys 0m0.366s Signed-off-by: Kuniyuki Iwashima <[email protected]>
1 parent 65230eb commit de45fd5

File tree

2 files changed

+407
-0
lines changed

2 files changed

+407
-0
lines changed
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
// SPDX-License-Identifier: GPL-2.0
2+
/* Copyright 2025 Google LLC */
3+
4+
#include <test_progs.h>
5+
#include "sk_memcg.skel.h"
6+
#include "network_helpers.h"
7+
8+
#define NR_SOCKETS 64
9+
#define NR_SEND 128
10+
#define BUF_SINGLE 1024
11+
#define BUF_TOTAL (BUF_SINGLE * NR_SEND)
12+
13+
struct test_case {
14+
char name[8];
15+
int family;
16+
int type;
17+
int (*create_sockets)(struct test_case *test_case, int sk[], int len);
18+
long (*get_memory_allocated)(struct test_case *test_case, struct sk_memcg *skel);
19+
};
20+
21+
static int tcp_create_sockets(struct test_case *test_case, int sk[], int len)
22+
{
23+
int server, i;
24+
25+
server = start_server(test_case->family, test_case->type, NULL, 0, 0);
26+
ASSERT_GE(server, 0, "start_server_str");
27+
28+
for (i = 0; i < len / 2; i++) {
29+
sk[i * 2] = connect_to_fd(server, 0);
30+
if (!ASSERT_GE(sk[i * 2], 0, "connect_to_fd"))
31+
return sk[i * 2];
32+
33+
sk[i * 2 + 1] = accept(server, NULL, NULL);
34+
if (!ASSERT_GE(sk[i * 2 + 1], 0, "accept"))
35+
return sk[i * 2 + 1];
36+
}
37+
38+
close(server);
39+
40+
return 0;
41+
}
42+
43+
static int udp_create_sockets(struct test_case *test_case, int sk[], int len)
44+
{
45+
int i, err, rcvbuf = BUF_TOTAL;
46+
47+
for (i = 0; i < len / 2; i++) {
48+
sk[i * 2] = start_server(test_case->family, test_case->type, NULL, 0, 0);
49+
if (!ASSERT_GE(sk[i * 2], 0, "start_server"))
50+
return sk[i * 2];
51+
52+
sk[i * 2 + 1] = connect_to_fd(sk[i * 2], 0);
53+
if (!ASSERT_GE(sk[i * 2 + 1], 0, "connect_to_fd"))
54+
return sk[i * 2 + 1];
55+
56+
err = connect_fd_to_fd(sk[i * 2], sk[i * 2 + 1], 0);
57+
if (!ASSERT_EQ(err, 0, "connect_fd_to_fd"))
58+
return err;
59+
60+
err = setsockopt(sk[i * 2], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
61+
if (!ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)"))
62+
return err;
63+
64+
err = setsockopt(sk[i * 2 + 1], SOL_SOCKET, SO_RCVBUF, &rcvbuf, sizeof(int));
65+
if (!ASSERT_EQ(err, 0, "setsockopt(SO_RCVBUF)"))
66+
return err;
67+
}
68+
69+
return 0;
70+
}
71+
72+
static long get_memory_allocated(struct test_case *test_case,
73+
bool *activated, bool *stable,
74+
long *memory_allocated)
75+
{
76+
*stable = false;
77+
78+
do {
79+
*activated = true;
80+
81+
/* AF_INET and AF_INET6 share the same memory_allocated.
82+
* tcp_init_sock() is called by AF_INET and AF_INET6,
83+
* but udp_lib_init_sock() is inline.
84+
*/
85+
socket(AF_INET, test_case->type, 0);
86+
} while (!*stable);
87+
88+
return *memory_allocated;
89+
}
90+
91+
static long tcp_get_memory_allocated(struct test_case *test_case, struct sk_memcg *skel)
92+
{
93+
return get_memory_allocated(test_case,
94+
&skel->bss->tcp_activated,
95+
&skel->bss->tcp_stable,
96+
&skel->bss->tcp_memory_allocated);
97+
}
98+
99+
static long udp_get_memory_allocated(struct test_case *test_case, struct sk_memcg *skel)
100+
{
101+
return get_memory_allocated(test_case,
102+
&skel->bss->udp_activated,
103+
&skel->bss->udp_stable,
104+
&skel->bss->udp_memory_allocated);
105+
}
106+
107+
static int check_exclusive(struct test_case *test_case,
108+
struct sk_memcg *skel, bool exclusive)
109+
{
110+
char buf[BUF_SINGLE] = {};
111+
long memory_allocated[2];
112+
int sk[NR_SOCKETS] = {};
113+
int err, i, j;
114+
115+
err = test_case->create_sockets(test_case, sk, ARRAY_SIZE(sk));
116+
if (err)
117+
goto close;
118+
119+
memory_allocated[0] = test_case->get_memory_allocated(test_case, skel);
120+
121+
/* allocate pages >= 1024 */
122+
for (i = 0; i < ARRAY_SIZE(sk); i++) {
123+
for (j = 0; j < NR_SEND; j++) {
124+
int bytes = send(sk[i], buf, sizeof(buf), 0);
125+
126+
/* Avoid too noisy logs when something failed. */
127+
if (bytes != sizeof(buf)) {
128+
ASSERT_EQ(bytes, sizeof(buf), "send");
129+
if (bytes < 0) {
130+
err = bytes;
131+
goto close;
132+
}
133+
}
134+
}
135+
}
136+
137+
memory_allocated[1] = test_case->get_memory_allocated(test_case, skel);
138+
139+
if (exclusive)
140+
ASSERT_LE(memory_allocated[1], memory_allocated[0] + 10, "exclusive");
141+
else
142+
ASSERT_GT(memory_allocated[1], memory_allocated[0] + 1024, "not exclusive");
143+
144+
close:
145+
for (i = 0; i < ARRAY_SIZE(sk); i++)
146+
close(sk[i]);
147+
148+
if (test_case->type == SOCK_DGRAM) {
149+
/* UDP recv queue is destroyed after RCU grace period.
150+
* With one kern_sync_rcu(), memory_allocated[0] of the
151+
* isoalted case often matches with memory_allocated[1]
152+
* of the preceding non-exclusive case.
153+
*/
154+
kern_sync_rcu();
155+
kern_sync_rcu();
156+
}
157+
158+
return err;
159+
}
160+
161+
void run_test(struct test_case *test_case)
162+
{
163+
struct nstoken *nstoken;
164+
struct sk_memcg *skel;
165+
int cgroup, err;
166+
167+
skel = sk_memcg__open_and_load();
168+
if (!ASSERT_OK_PTR(skel, "open_and_load"))
169+
return;
170+
171+
skel->bss->nr_cpus = libbpf_num_possible_cpus();
172+
173+
err = sk_memcg__attach(skel);
174+
if (!ASSERT_OK(err, "attach"))
175+
goto destroy_skel;
176+
177+
cgroup = test__join_cgroup("/sk_memcg");
178+
if (!ASSERT_GE(cgroup, 0, "join_cgroup"))
179+
goto destroy_skel;
180+
181+
err = make_netns("sk_memcg");
182+
if (!ASSERT_EQ(err, 0, "make_netns"))
183+
goto close_cgroup;
184+
185+
nstoken = open_netns("sk_memcg");
186+
if (!ASSERT_OK_PTR(nstoken, "open_netns"))
187+
goto remove_netns;
188+
189+
err = check_exclusive(test_case, skel, false);
190+
if (!ASSERT_EQ(err, 0, "test_exclusive(false)"))
191+
goto close_netns;
192+
193+
err = write_sysctl("/proc/sys/net/core/memcg_exclusive", "1");
194+
if (!ASSERT_EQ(err, 0, "write_sysctl(1)"))
195+
goto close_netns;
196+
197+
err = check_exclusive(test_case, skel, true);
198+
if (!ASSERT_EQ(err, 0, "test_exclusive(true by sysctl)"))
199+
goto close_netns;
200+
201+
err = write_sysctl("/proc/sys/net/core/memcg_exclusive", "0");
202+
if (!ASSERT_EQ(err, 0, "write_sysctl(0)"))
203+
goto close_netns;
204+
205+
skel->links.sock_create = bpf_program__attach_cgroup(skel->progs.sock_create, cgroup);
206+
if (!ASSERT_OK_PTR(skel->links.sock_create, "attach_cgroup(sock_create)"))
207+
goto close_netns;
208+
209+
err = check_exclusive(test_case, skel, true);
210+
ASSERT_EQ(err, 0, "test_exclusive(true by bpf)");
211+
212+
close_netns:
213+
close_netns(nstoken);
214+
remove_netns:
215+
remove_netns("sk_memcg");
216+
close_cgroup:
217+
close(cgroup);
218+
destroy_skel:
219+
sk_memcg__destroy(skel);
220+
}
221+
222+
struct test_case test_cases[] = {
223+
{
224+
.name = "TCP ",
225+
.family = AF_INET,
226+
.type = SOCK_STREAM,
227+
.create_sockets = tcp_create_sockets,
228+
.get_memory_allocated = tcp_get_memory_allocated,
229+
},
230+
{
231+
.name = "UDP ",
232+
.family = AF_INET,
233+
.type = SOCK_DGRAM,
234+
.create_sockets = udp_create_sockets,
235+
.get_memory_allocated = udp_get_memory_allocated,
236+
},
237+
{
238+
.name = "TCPv6",
239+
.family = AF_INET6,
240+
.type = SOCK_STREAM,
241+
.create_sockets = tcp_create_sockets,
242+
.get_memory_allocated = tcp_get_memory_allocated,
243+
},
244+
{
245+
.name = "UDPv6",
246+
.family = AF_INET6,
247+
.type = SOCK_DGRAM,
248+
.create_sockets = udp_create_sockets,
249+
.get_memory_allocated = udp_get_memory_allocated,
250+
},
251+
};
252+
253+
void serial_test_sk_memcg(void)
254+
{
255+
int i;
256+
257+
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
258+
test__start_subtest(test_cases[i].name);
259+
run_test(&test_cases[i]);
260+
}
261+
}

0 commit comments

Comments
 (0)