Skip to content

Commit 8154dd8

Browse files
Fernando Fernandez ManceraKernel Patches Daemon
authored andcommitted
xsk: avoid data corruption on cq descriptor number
Since commit 30f241f ("xsk: Fix immature cq descriptor production"), the descriptor number is stored in skb control block and xsk_cq_submit_addr_locked() relies on it to put the umem addrs onto pool's completion queue. skb control block shouldn't be used for this purpose as after transmit xsk doesn't have control over it and other subsystems could use it. This leads to the following kernel panic due to a NULL pointer dereference. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 2 UID: 1 PID: 927 Comm: p4xsk.bin Not tainted 6.16.12+deb14-cloud-amd64 #1 PREEMPT(lazy) Debian 6.16.12-1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-debian-1.17.0-1 04/01/2014 RIP: 0010:xsk_destruct_skb+0xd0/0x180 [...] Call Trace: <IRQ> ? napi_complete_done+0x7a/0x1a0 ip_rcv_core+0x1bb/0x340 ip_rcv+0x30/0x1f0 __netif_receive_skb_one_core+0x85/0xa0 process_backlog+0x87/0x130 __napi_poll+0x28/0x180 net_rx_action+0x339/0x420 handle_softirqs+0xdc/0x320 ? handle_edge_irq+0x90/0x1e0 do_softirq.part.0+0x3b/0x60 </IRQ> <TASK> __local_bh_enable_ip+0x60/0x70 __dev_direct_xmit+0x14e/0x1f0 __xsk_generic_xmit+0x482/0xb70 ? __remove_hrtimer+0x41/0xa0 ? __xsk_generic_xmit+0x51/0xb70 ? _raw_spin_unlock_irqrestore+0xe/0x40 xsk_sendmsg+0xda/0x1c0 __sys_sendto+0x1ee/0x200 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x84/0x2f0 ? __pfx_pollwake+0x10/0x10 ? __rseq_handle_notify_resume+0xad/0x4c0 ? restore_fpregs_from_fpstate+0x3c/0x90 ? switch_fpu_return+0x5b/0xe0 ? do_syscall_64+0x204/0x2f0 ? do_syscall_64+0x204/0x2f0 ? do_syscall_64+0x204/0x2f0 entry_SYSCALL_64_after_hwframe+0x76/0x7e </TASK> [...] Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x1c000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Use the skb XDP extension to store the number of cq descriptors along with a list of umem addresses. Fixes: 30f241f ("xsk: Fix immature cq descriptor production") Closes: https://lore.kernel.org/netdev/[email protected]/ Signed-off-by: Fernando Fernandez Mancera <[email protected]>
1 parent c0a0c26 commit 8154dd8

File tree

1 file changed

+49
-21
lines changed

1 file changed

+49
-21
lines changed

net/xdp/xsk.c

Lines changed: 49 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,8 @@ struct xsk_addr_node {
4141
struct list_head addr_node;
4242
};
4343

44-
struct xsk_addr_head {
45-
u32 num_descs;
46-
struct list_head addrs_list;
47-
};
48-
4944
static struct kmem_cache *xsk_tx_generic_cache;
5045

51-
#define XSKCB(skb) ((struct xsk_addr_head *)((skb)->cb))
52-
5346
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
5447
{
5548
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -562,6 +555,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
562555
struct sk_buff *skb)
563556
{
564557
struct xsk_addr_node *pos, *tmp;
558+
struct xdp_skb_ext *ext;
565559
u32 descs_processed = 0;
566560
unsigned long flags;
567561
u32 idx;
@@ -573,14 +567,16 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
573567
(u64)(uintptr_t)skb_shinfo(skb)->destructor_arg);
574568
descs_processed++;
575569

576-
if (unlikely(XSKCB(skb)->num_descs > 1)) {
577-
list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {
570+
ext = skb_ext_find(skb, SKB_EXT_XDP);
571+
if (unlikely(ext)) {
572+
list_for_each_entry_safe(pos, tmp, &ext->addrs_list, addr_node) {
578573
xskq_prod_write_addr(pool->cq, idx + descs_processed,
579574
pos->addr);
580575
descs_processed++;
581576
list_del(&pos->addr_node);
582577
kmem_cache_free(xsk_tx_generic_cache, pos);
583578
}
579+
skb_ext_del(skb, SKB_EXT_XDP);
584580
}
585581
xskq_prod_submit_n(pool->cq, descs_processed);
586582
spin_unlock_irqrestore(&pool->cq_lock, flags);
@@ -597,12 +593,19 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
597593

598594
static void xsk_inc_num_desc(struct sk_buff *skb)
599595
{
600-
XSKCB(skb)->num_descs++;
596+
struct xdp_skb_ext *ext;
597+
598+
ext = skb_ext_find(skb, SKB_EXT_XDP);
599+
if (ext)
600+
ext->num_descs++;
601601
}
602602

603603
static u32 xsk_get_num_desc(struct sk_buff *skb)
604604
{
605-
return XSKCB(skb)->num_descs;
605+
struct xdp_skb_ext *ext;
606+
607+
ext = skb_ext_find(skb, SKB_EXT_XDP);
608+
return ext ? ext->num_descs : 1;
606609
}
607610

608611
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -621,12 +624,9 @@ static void xsk_destruct_skb(struct sk_buff *skb)
621624
static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs,
622625
u64 addr)
623626
{
624-
BUILD_BUG_ON(sizeof(struct xsk_addr_head) > sizeof(skb->cb));
625-
INIT_LIST_HEAD(&XSKCB(skb)->addrs_list);
626627
skb->dev = xs->dev;
627628
skb->priority = READ_ONCE(xs->sk.sk_priority);
628629
skb->mark = READ_ONCE(xs->sk.sk_mark);
629-
XSKCB(skb)->num_descs = 0;
630630
skb->destructor = xsk_destruct_skb;
631631
skb_shinfo(skb)->destructor_arg = (void *)(uintptr_t)addr;
632632
}
@@ -636,12 +636,15 @@ static void xsk_consume_skb(struct sk_buff *skb)
636636
struct xdp_sock *xs = xdp_sk(skb->sk);
637637
u32 num_descs = xsk_get_num_desc(skb);
638638
struct xsk_addr_node *pos, *tmp;
639+
struct xdp_skb_ext *ext;
639640

640-
if (unlikely(num_descs > 1)) {
641-
list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {
641+
ext = skb_ext_find(skb, SKB_EXT_XDP);
642+
if (unlikely(ext)) {
643+
list_for_each_entry_safe(pos, tmp, &ext->addrs_list, addr_node) {
642644
list_del(&pos->addr_node);
643645
kmem_cache_free(xsk_tx_generic_cache, pos);
644646
}
647+
skb_ext_del(skb, SKB_EXT_XDP);
645648
}
646649

647650
skb->destructor = sock_wfree;
@@ -727,6 +730,18 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
727730
return ERR_PTR(err);
728731
}
729732
} else {
733+
struct xdp_skb_ext *ext;
734+
735+
ext = skb_ext_find(skb, SKB_EXT_XDP);
736+
if (!ext) {
737+
ext = skb_ext_add(skb, SKB_EXT_XDP);
738+
if (!ext)
739+
return ERR_PTR(-ENOMEM);
740+
memset(ext, 0, sizeof(*ext));
741+
INIT_LIST_HEAD(&ext->addrs_list);
742+
ext->num_descs = 1;
743+
}
744+
730745
xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
731746
if (!xsk_addr)
732747
return ERR_PTR(-ENOMEM);
@@ -736,7 +751,8 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
736751
* would be dropped, which implies freeing all list elements
737752
*/
738753
xsk_addr->addr = desc->addr;
739-
list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
754+
list_add_tail(&xsk_addr->addr_node, &ext->addrs_list);
755+
xsk_inc_num_desc(skb);
740756
}
741757

742758
len = desc->len;
@@ -814,6 +830,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
814830
} else {
815831
int nr_frags = skb_shinfo(skb)->nr_frags;
816832
struct xsk_addr_node *xsk_addr;
833+
struct xdp_skb_ext *ext;
817834
struct page *page;
818835
u8 *vaddr;
819836

@@ -828,6 +845,19 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
828845
goto free_err;
829846
}
830847

848+
ext = skb_ext_find(skb, SKB_EXT_XDP);
849+
if (!ext) {
850+
ext = skb_ext_add(skb, SKB_EXT_XDP);
851+
if (!ext) {
852+
__free_page(page);
853+
err = -ENOMEM;
854+
goto free_err;
855+
}
856+
memset(ext, 0, sizeof(*ext));
857+
INIT_LIST_HEAD(&ext->addrs_list);
858+
ext->num_descs = 1;
859+
}
860+
831861
xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
832862
if (!xsk_addr) {
833863
__free_page(page);
@@ -843,12 +873,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
843873
refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
844874

845875
xsk_addr->addr = desc->addr;
846-
list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
876+
list_add_tail(&xsk_addr->addr_node, &ext->addrs_list);
877+
xsk_inc_num_desc(skb);
847878
}
848879
}
849880

850-
xsk_inc_num_desc(skb);
851-
852881
return skb;
853882

854883
free_err:
@@ -857,7 +886,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
857886

858887
if (err == -EOVERFLOW) {
859888
/* Drop the packet */
860-
xsk_inc_num_desc(xs->skb);
861889
xsk_drop_skb(xs->skb);
862890
xskq_cons_release(xs->tx);
863891
} else {

0 commit comments

Comments
 (0)