diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index eaf2d328424883..f4f841dde42cea 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3213,6 +3213,8 @@ struct bpf_sock_ops { __u32 sk_txhash; __u64 bytes_received; __u64 bytes_acked; + __u64 netns_dev; + __u64 netns_ino; }; /* Definitions for bpf_sock_ops_cb_flags */ diff --git a/net/core/filter.c b/net/core/filter.c index 2f88baf39cc200..9c77464b150182 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -75,6 +75,8 @@ #include #include #include +#include +#include /** * sk_filter_trim_cap - run a packet through a socket filter @@ -6810,6 +6812,24 @@ static bool sock_ops_is_valid_access(int off, int size, } } else { switch (off) { + case offsetof(struct bpf_sock_ops, netns_dev) ... + offsetof(struct bpf_sock_ops, netns_dev) + sizeof(u64) - 1: +#ifdef CONFIG_NET_NS + if (off - offsetof(struct bpf_sock_ops, netns_dev) + + size > sizeof(u64)) + return false; +#else + return false; +#endif + break; + case offsetof(struct bpf_sock_ops, netns_ino): +#ifdef CONFIG_NET_NS + if (size != sizeof(u64)) + return false; +#else + return false; +#endif + break; case bpf_ctx_range_till(struct bpf_sock_ops, bytes_received, bytes_acked): if (size != sizeof(__u64)) @@ -7727,6 +7747,11 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +static struct ns_common *sockops_netns_cb(void *private_data) +{ + return &init_net.ns; +} + static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -7735,6 +7760,10 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, { struct bpf_insn *insn = insn_buf; int off; + struct inode *ns_inode; + struct path ns_path; + u64 netns_dev; + void *res; /* Helper macro for adding read access to tcp_sock or sock fields. */ #define SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ) \ @@ -7981,6 +8010,71 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, SOCK_OPS_GET_OR_SET_FIELD(sk_txhash, sk_txhash, struct sock, type); break; + + case offsetof(struct bpf_sock_ops, netns_dev) ... + offsetof(struct bpf_sock_ops, netns_dev) + sizeof(u64) - 1: +#ifdef CONFIG_NET_NS + /* We get the netns_dev at BPF-load-time and not at + * BPF-exec-time. We assume that netns_dev is a constant. + */ + res = ns_get_path_cb(&ns_path, sockops_netns_cb, NULL); + if (IS_ERR(res)) { + netns_dev = 0; + } else { + ns_inode = ns_path.dentry->d_inode; + netns_dev = new_encode_dev(ns_inode->i_sb->s_dev); + } + off = si->off; + off -= offsetof(struct bpf_sock_ops, netns_dev); + switch (BPF_LDST_BYTES(si)) { + case sizeof(u64): + *insn++ = BPF_MOV64_IMM(si->dst_reg, netns_dev); + break; + case sizeof(u32): + netns_dev = *(u32 *)(((char *)&netns_dev) + off); + *insn++ = BPF_MOV32_IMM(si->dst_reg, netns_dev); + break; + case sizeof(u16): + netns_dev = *(u16 *)(((char *)&netns_dev) + off); + *insn++ = BPF_MOV32_IMM(si->dst_reg, netns_dev); + break; + case sizeof(u8): + netns_dev = *(u8 *)(((char *)&netns_dev) + off); + *insn++ = BPF_MOV32_IMM(si->dst_reg, netns_dev); + break; + } +#endif + break; + + case offsetof(struct bpf_sock_ops, netns_ino): +#ifdef CONFIG_NET_NS + /* Loading: sk_ops->sk->__sk_common.skc_net.net->ns.inum + * Type: (struct bpf_sock_ops_kern *) + * ->(struct sock *) + * ->(struct sock_common) + * .possible_net_t + * .(struct net *) + * ->(struct ns_common) + * .(unsigned int) + */ + BUILD_BUG_ON(offsetof(struct sock, __sk_common) != 0); + BUILD_BUG_ON(offsetof(possible_net_t, net) != 0); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct bpf_sock_ops_kern, sk), + si->dst_reg, si->src_reg, + offsetof(struct bpf_sock_ops_kern, sk)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + possible_net_t, net), + si->dst_reg, si->dst_reg, + offsetof(struct sock_common, skc_net)); + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF( + struct ns_common, inum), + si->dst_reg, si->dst_reg, + offsetof(struct net, ns) + + offsetof(struct ns_common, inum)); +#endif + break; + } return insn - insn_buf; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 704bb69514a203..eb56620a9d7aed 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3206,6 +3206,8 @@ struct bpf_sock_ops { __u32 sk_txhash; __u64 bytes_received; __u64 bytes_acked; + __u64 netns_dev; + __u64 netns_ino; }; /* Definitions for bpf_sock_ops_cb_flags */ diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 3845144e2c917e..5a1b9c96fca133 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -2,6 +2,7 @@ // Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io #include #include +#include #include #include #include @@ -21,6 +22,7 @@ #include #include #include +#include #include #include @@ -63,8 +65,8 @@ int s1, s2, c1, c2, p1, p2; int test_cnt; int passed; int failed; -int map_fd[8]; -struct bpf_map *maps[8]; +int map_fd[9]; +struct bpf_map *maps[9]; int prog_fd[11]; int txmsg_pass; @@ -84,6 +86,7 @@ int txmsg_ingress; int txmsg_skb; int ktls; int peek_flag; +uint64_t netns_opt; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -111,6 +114,7 @@ static const struct option long_options[] = { {"txmsg_skb", no_argument, &txmsg_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, {"peek", no_argument, &peek_flag, 1 }, + {"netns", required_argument, NULL, 'n'}, {0, 0, NULL, 0 } }; @@ -1585,6 +1589,7 @@ char *map_names[] = { "sock_bytes", "sock_redir_flags", "sock_skb_opts", + "sock_netns", }; int prog_attach_type[] = { @@ -1619,6 +1624,8 @@ static int populate_progs(char *bpf_file) struct bpf_object *obj; int i = 0; long err; + struct stat netns_sb; + uint64_t netns_ino; obj = bpf_object__open(bpf_file); err = libbpf_get_error(obj); @@ -1655,6 +1662,28 @@ static int populate_progs(char *bpf_file) } } + if (netns_opt == 0) { + err = stat("/proc/self/ns/net", &netns_sb); + if (err) { + fprintf(stderr, + "ERROR: cannot stat network namespace: %ld (%s)\n", + err, strerror(errno)); + return -1; + } + netns_ino = netns_sb.st_ino; + } else { + netns_ino = netns_opt; + } + i = 1; + err = bpf_map_update_elem(map_fd[8], &netns_ino, &i, BPF_ANY); + if (err) { + fprintf(stderr, + "ERROR: bpf_map_update_elem (netns): %ld (%s)\n", + err, strerror(errno)); + return -1; + } + + return 0; } @@ -1738,7 +1767,7 @@ int main(int argc, char **argv) if (argc < 2) return test_suite(-1); - while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:", + while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:n:", long_options, &longindex)) != -1) { switch (opt) { case 's': @@ -1805,6 +1834,9 @@ int main(int argc, char **argv) return -1; } break; + case 'n': + netns_opt = strtoull(optarg, NULL, 10); + break; case 0: break; case 'h': diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h index e7639f66a9418f..317406dad6cfaf 100644 --- a/tools/testing/selftests/bpf/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/test_sockmap_kern.h @@ -91,6 +91,13 @@ struct bpf_map_def SEC("maps") sock_skb_opts = { .max_entries = 1 }; +struct bpf_map_def SEC("maps") sock_netns = { + .type = BPF_MAP_TYPE_HASH, + .key_size = sizeof(__u64), + .value_size = sizeof(int), + .max_entries = 16 +}; + SEC("sk_skb1") int bpf_prog1(struct __sk_buff *skb) { @@ -132,9 +139,24 @@ int bpf_sockmap(struct bpf_sock_ops *skops) { __u32 lport, rport; int op, err = 0, index, key, ret; + int i = 0; + __u64 netns_dev, netns_ino; + int *allowed; op = (int) skops->op; + netns_dev = skops->netns_dev; + netns_ino = skops->netns_ino; + bpf_printk("bpf_sockmap: netns_dev = %lu netns_ino = %lu\n", + netns_dev, netns_ino); + + // Only allow sockmap connection on the configured network namespace + allowed = bpf_map_lookup_elem(&sock_netns, &netns_ino); + if (allowed == NULL || *allowed == 0) { + bpf_printk("not binding connection on netns_ino %lu\n", + netns_ino); + return 0; + } switch (op) { case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c index 8504ac93780989..9e4c6c78eb9d5c 100644 --- a/tools/testing/selftests/bpf/verifier/var_off.c +++ b/tools/testing/selftests/bpf/verifier/var_off.c @@ -246,3 +246,56 @@ .result = ACCEPT, .prog_type = BPF_PROG_TYPE_LWT_IN, }, +{ + "sockops accessing bpf_sock_ops->netns_dev, ok", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev)), + + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev)), + BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 4), + + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev)), + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 2), + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 4), + BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 6), + + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev)), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 1), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 2), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 3), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 4), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 5), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 6), + BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_dev) + 7), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SOCK_OPS, +}, +{ + "sockops accessing bpf_sock_ops->netns_ino, ok", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops, + netns_ino)), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_SOCK_OPS, +},