Skip to content

Commit 334e25f

Browse files
Fernando Fernandez ManceraKernel Patches Daemon
authored andcommitted
xsk: avoid data corruption on cq descriptor number
Since commit 30f241f ("xsk: Fix immature cq descriptor production"), the descriptor number is stored in skb control block and xsk_cq_submit_addr_locked() relies on it to put the umem addrs onto pool's completion queue. skb control block shouldn't be used for this purpose as after transmit xsk doesn't have control over it and other subsystems could use it. This leads to the following kernel panic due to a NULL pointer dereference. BUG: kernel NULL pointer dereference, address: 0000000000000000 #PF: supervisor read access in kernel mode #PF: error_code(0x0000) - not-present page PGD 0 P4D 0 Oops: Oops: 0000 [#1] SMP NOPTI CPU: 2 UID: 1 PID: 927 Comm: p4xsk.bin Not tainted 6.16.12+deb14-cloud-amd64 #1 PREEMPT(lazy) Debian 6.16.12-1 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.17.0-debian-1.17.0-1 04/01/2014 RIP: 0010:xsk_destruct_skb+0xd0/0x180 [...] Call Trace: <IRQ> ? napi_complete_done+0x7a/0x1a0 ip_rcv_core+0x1bb/0x340 ip_rcv+0x30/0x1f0 __netif_receive_skb_one_core+0x85/0xa0 process_backlog+0x87/0x130 __napi_poll+0x28/0x180 net_rx_action+0x339/0x420 handle_softirqs+0xdc/0x320 ? handle_edge_irq+0x90/0x1e0 do_softirq.part.0+0x3b/0x60 </IRQ> <TASK> __local_bh_enable_ip+0x60/0x70 __dev_direct_xmit+0x14e/0x1f0 __xsk_generic_xmit+0x482/0xb70 ? __remove_hrtimer+0x41/0xa0 ? __xsk_generic_xmit+0x51/0xb70 ? _raw_spin_unlock_irqrestore+0xe/0x40 xsk_sendmsg+0xda/0x1c0 __sys_sendto+0x1ee/0x200 __x64_sys_sendto+0x24/0x30 do_syscall_64+0x84/0x2f0 ? __pfx_pollwake+0x10/0x10 ? __rseq_handle_notify_resume+0xad/0x4c0 ? restore_fpregs_from_fpstate+0x3c/0x90 ?1082000 switch_fpu_return+0x5b/0xe0 ? do_syscall_64+0x204/0x2f0 ? do_syscall_64+0x204/0x2f0 ? do_syscall_64+0x204/0x2f0 entry_SYSCALL_64_after_hwframe+0x76/0x7e </TASK> [...] Kernel panic - not syncing: Fatal exception in interrupt Kernel Offset: 0x1c000000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff) Use the skb XDP extension to store the number of cq descriptors along with a list of umem addresses. Fixes: 30f241f ("xsk: Fix immature cq descriptor production") Closes: https://lore.kernel.org/netdev/[email protected]/ Signed-off-by: Fernando Fernandez Mancera <[email protected]>
1 parent 103cc0e commit 334e25f

File tree

1 file changed

+60
-21
lines changed

1 file changed

+60
-21
lines changed

net/xdp/xsk.c

Lines changed: 60 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -41,15 +41,8 @@ struct xsk_addr_node {
4141
struct list_head addr_node;
4242
};
4343

44-
struct xsk_addr_head {
45-
u32 num_descs;
46-
struct list_head addrs_list;
47-
};
48-
4944
static struct kmem_cache *xsk_tx_generic_cache;
5045

51-
#define XSKCB(skb) ((struct xsk_addr_head *)((skb)->cb))
52-
5346
void xsk_set_rx_need_wakeup(struct xsk_buff_pool *pool)
5447
{
5548
if (pool->cached_need_wakeup & XDP_WAKEUP_RX)
@@ -562,6 +555,7 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
562555
struct sk_buff *skb)
563556
{
564557
struct xsk_addr_node *pos, *tmp;
558+
struct xdp_skb_ext *ext;
565559
u32 descs_processed = 0;
566560
unsigned long flags;
567561
u32 idx;
@@ -573,14 +567,16 @@ static void xsk_cq_submit_addr_locked(struct xsk_buff_pool *pool,
573567
(u64)(uintptr_t)skb_shinfo(skb)->destructor_arg);
574568
descs_processed++;
575569

576-
if (unlikely(XSKCB(skb)->num_descs > 1)) {
577-
list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {
570+
ext = skb_ext_find(skb, SKB_EXT_XDP);
571+
if (unlikely(ext && ext->num_descs > 1)) {
572+
list_for_each_entry_safe(pos, tmp, &ext->addrs_list, addr_node) {
578573
xskq_prod_write_addr(pool->cq, idx + descs_processed,
579574
pos->addr);
580575
descs_processed++;
581576
list_del(&pos->addr_node);
582577
kmem_cache_free(xsk_tx_generic_cache, pos);
583578
}
579+
skb_ext_del(skb, SKB_EXT_XDP);
584580
}
585581
xskq_prod_submit_n(pool->cq, descs_processed);
586582
spin_unlock_irqrestore(&pool->cq_lock, flags);
@@ -597,12 +593,19 @@ static void xsk_cq_cancel_locked(struct xsk_buff_pool *pool, u32 n)
597593

598594
static void xsk_inc_num_desc(struct sk_buff *skb)
599595
{
600-
XSKCB(skb)->num_descs++;
596+
struct xdp_skb_ext *ext;
597+
598+
ext = skb_ext_find(skb, SKB_EXT_XDP);
599+
if (ext)
600+
ext->num_descs++;
601601
}
602602

603603
static u32 xsk_get_num_desc(struct sk_buff *skb)
604604
{
605-
return XSKCB(skb)->num_descs;
605+
struct xdp_skb_ext *ext;
606+
607+
ext = skb_ext_find(skb, SKB_EXT_XDP);
608+
return (ext && ext->num_descs > 1) ? ext->num_descs : 1;
606609
}
607610

608611
static void xsk_destruct_skb(struct sk_buff *skb)
@@ -621,12 +624,9 @@ static void xsk_destruct_skb(struct sk_buff *skb)
621624
static void xsk_skb_init_misc(struct sk_buff *skb, struct xdp_sock *xs,
622625
u64 addr)
623626
{
624-
BUILD_BUG_ON(sizeof(struct xsk_addr_head) > sizeof(skb->cb));
625-
INIT_LIST_HEAD(&XSKCB(skb)->addrs_list);
626627
skb->dev = xs->dev;
627628
skb->priority = READ_ONCE(xs->sk.sk_priority);
628629
skb->mark = READ_ONCE(xs->sk.sk_mark);
629-
XSKCB(skb)->num_descs = 0;
630630
skb->destructor = xsk_destruct_skb;
631631
skb_shinfo(skb)->destructor_arg = (void *)(uintptr_t)addr;
632632
}
@@ -636,12 +636,15 @@ static void xsk_consume_skb(struct sk_buff *skb)
636636
struct xdp_sock *xs = xdp_sk(skb->sk);
637637
u32 num_descs = xsk_get_num_desc(skb);
638638
struct xsk_addr_node *pos, *tmp;
639+
struct xdp_skb_ext *ext;
639640

640-
if (unlikely(num_descs > 1)) {
641-
list_for_each_entry_safe(pos, tmp, &XSKCB(skb)->addrs_list, addr_node) {
641+
ext = skb_ext_find(skb, SKB_EXT_XDP);
642+
if (unlikely(ext && ext->num_descs > 1)) {
643+
list_for_each_entry_safe(pos, tmp, &ext->addrs_list, addr_node) {
642644
list_del(&pos->addr_node);
643645
kmem_cache_free(xsk_tx_generic_cache, pos);
644646
}
647+
skb_ext_del(skb, SKB_EXT_XDP);
645648
}
646649

647650
skb->destructor = sock_wfree;
@@ -727,16 +730,32 @@ static struct sk_buff *xsk_build_skb_zerocopy(struct xdp_sock *xs,
727730
return ERR_PTR(err);
728731
}
729732
} else {
733+
struct xdp_skb_ext *ext;
734+
730735
xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
731736
if (!xsk_addr)
732737
return ERR_PTR(-ENOMEM);
733738

739+
ext = skb_ext_find(skb, SKB_EXT_XDP);
740+
if (!ext) {
741+
ext = skb_ext_add(skb, SKB_EXT_XDP);
742+
if (!ext)
743+
return ERR_PTR(-ENOMEM);
744+
memset(ext, 0, sizeof(*ext));
745+
INIT_LIST_HEAD(&ext->addrs_list);
746+
ext->num_descs = 1;
747+
} else if (ext->num_descs == 0) {
748+
INIT_LIST_HEAD(&ext->addrs_list);
749+
ext->num_descs = 1;
750+
}
751+
734752
/* in case of -EOVERFLOW that could happen below,
735753
* xsk_consume_skb() will release this node as whole skb
736754
* would be dropped, which implies freeing all list elements
737755
*/
738756
xsk_addr->addr = desc->addr;
739-
list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
757+
list_add_tail(&xsk_addr->addr_node, &ext->addrs_list);
758+
xsk_inc_num_desc(skb);
740759
}
741760

742761
len = desc->len;
@@ -804,6 +823,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
804823
if (unlikely(err))
805824
goto free_err;
806825

826+
if (!skb_ext_add(skb, SKB_EXT_XDP)) {
827+
err = -ENOMEM;
828+
goto free_err;
829+
}
830+
807831
xsk_skb_init_misc(skb, xs, desc->addr);
808832
if (desc->options & XDP_TX_METADATA) {
809833
err = xsk_skb_metadata(skb, buffer, desc,
@@ -814,6 +838,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
814838
} else {
815839
int nr_frags = skb_shinfo(skb)->nr_frags;
816840
struct xsk_addr_node *xsk_addr;
841+
struct xdp_skb_ext *ext;
817842
struct page *page;
818843
u8 *vaddr;
819844

@@ -828,6 +853,22 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
828853
goto free_err;
829854
}
830855

856+
ext = skb_ext_find(skb, SKB_EXT_XDP);
857+
if (!ext) {
858+
ext = skb_ext_add(skb, SKB_EXT_XDP);
859+
if (!ext) {
860+
__free_page(page);
861+
err = -ENOMEM;
862+
goto free_err;
863+
}
864+
memset(ext, 0, sizeof(*ext));
865+
INIT_LIST_HEAD(&ext->addrs_list);
866+
ext->num_descs = 1;
867+
} else if (ext->num_descs == 0) {
868+
INIT_LIST_HEAD(&ext->addrs_list);
869+
ext->num_descs = 1;
870+
}
871+
831872
xsk_addr = kmem_cache_zalloc(xsk_tx_generic_cache, GFP_KERNEL);
832873
if (!xsk_addr) {
833874
__free_page(page);
@@ -843,12 +884,11 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
843884
refcount_add(PAGE_SIZE, &xs->sk.sk_wmem_alloc);
844885

845886
xsk_addr->addr = desc->addr;
846-
list_add_tail(&xsk_addr->addr_node, &XSKCB(skb)->addrs_list);
887+
list_add_tail(&xsk_addr->addr_node, &ext->addrs_list);
888+
xsk_inc_num_desc(skb);
847889
}
848890
}
849891

850-
xsk_inc_num_desc(skb);
851-
852892
return skb;
853893

854894
free_err:
@@ -857,7 +897,6 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
857897

858898
if (err == -EOVERFLOW) {
859899
/* Drop the packet */
860-
xsk_inc_num_desc(xs->skb);
861900
xsk_drop_skb(xs->skb);
862901
xskq_cons_release(xs->tx);
863902
} else {

0 commit comments

Comments
 (0)