@@ -432,21 +432,6 @@ static void veth_set_multicast_list(struct net_device *dev)
432432{
433433}
434434
435- static struct sk_buff * veth_build_skb (void * head , int headroom , int len ,
436- int buflen )
437- {
438- struct sk_buff * skb ;
439-
440- skb = build_skb (head , buflen );
441- if (!skb )
442- return NULL ;
443-
444- skb_reserve (skb , headroom );
445- skb_put (skb , len );
446-
447- return skb ;
448- }
449-
450435static int veth_select_rxq (struct net_device * dev )
451436{
452437 return smp_processor_id () % dev -> real_num_rx_queues ;
@@ -694,72 +679,143 @@ static void veth_xdp_rcv_bulk_skb(struct veth_rq *rq, void **frames,
694679 }
695680}
696681
697- static struct sk_buff * veth_xdp_rcv_skb (struct veth_rq * rq ,
698- struct sk_buff * skb ,
699- struct veth_xdp_tx_bq * bq ,
700- struct veth_stats * stats )
682+ static void veth_xdp_get (struct xdp_buff * xdp )
701683{
702- u32 pktlen , headroom , act , metalen , frame_sz ;
703- void * orig_data , * orig_data_end ;
704- struct bpf_prog * xdp_prog ;
705- int mac_len , delta , off ;
706- struct xdp_buff xdp ;
684+ struct skb_shared_info * sinfo = xdp_get_shared_info_from_buff (xdp );
685+ int i ;
707686
708- skb_prepare_for_gro (skb );
687+ get_page (virt_to_page (xdp -> data ));
688+ if (likely (!xdp_buff_has_frags (xdp )))
689+ return ;
709690
710- rcu_read_lock ();
711- xdp_prog = rcu_dereference (rq -> xdp_prog );
712- if (unlikely (!xdp_prog )) {
713- rcu_read_unlock ();
714- goto out ;
715- }
691+ for (i = 0 ; i < sinfo -> nr_frags ; i ++ )
692+ __skb_frag_ref (& sinfo -> frags [i ]);
693+ }
716694
717- mac_len = skb -> data - skb_mac_header (skb );
718- pktlen = skb -> len + mac_len ;
719- headroom = skb_headroom (skb ) - mac_len ;
695+ static int veth_convert_xdp_buff_from_skb (struct veth_rq * rq ,
696+ struct xdp_buff * xdp ,
697+ struct sk_buff * * pskb )
698+ {
699+ struct sk_buff * skb = * pskb ;
700+ u32 frame_sz ;
720701
721702 if (skb_shared (skb ) || skb_head_is_locked (skb ) ||
722- skb_is_nonlinear (skb ) || headroom < XDP_PACKET_HEADROOM ) {
703+ skb_shinfo (skb )-> nr_frags ) {
704+ u32 size , len , max_head_size , off ;
723705 struct sk_buff * nskb ;
724- int size , head_off ;
725- void * head , * start ;
726706 struct page * page ;
707+ int i , head_off ;
727708
728- size = SKB_DATA_ALIGN (VETH_XDP_HEADROOM + pktlen ) +
729- SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
730- if (size > PAGE_SIZE )
709+ /* We need a private copy of the skb and data buffers since
710+ * the ebpf program can modify it. We segment the original skb
711+ * into order-0 pages without linearize it.
712+ *
713+ * Make sure we have enough space for linear and paged area
714+ */
715+ max_head_size = SKB_WITH_OVERHEAD (PAGE_SIZE -
716+ VETH_XDP_HEADROOM );
717+ if (skb -> len > PAGE_SIZE * MAX_SKB_FRAGS + max_head_size )
731718 goto drop ;
732719
720+ /* Allocate skb head */
733721 page = alloc_page (GFP_ATOMIC | __GFP_NOWARN );
734722 if (!page )
735723 goto drop ;
736724
737- head = page_address (page );
738- start = head + VETH_XDP_HEADROOM ;
739- if (skb_copy_bits (skb , - mac_len , start , pktlen )) {
740- page_frag_free (head );
725+ nskb = build_skb (page_address (page ), PAGE_SIZE );
726+ if (!nskb ) {
727+ put_page (page );
741728 goto drop ;
742729 }
743730
744- nskb = veth_build_skb ( head , VETH_XDP_HEADROOM + mac_len ,
745- skb -> len , PAGE_SIZE );
746- if (! nskb ) {
747- page_frag_free ( head );
731+ skb_reserve ( nskb , VETH_XDP_HEADROOM );
732+ size = min_t ( u32 , skb -> len , max_head_size );
733+ if (skb_copy_bits ( skb , 0 , nskb -> data , size ) ) {
734+ consume_skb ( nskb );
748735 goto drop ;
749736 }
737+ skb_put (nskb , size );
750738
751739 skb_copy_header (nskb , skb );
752740 head_off = skb_headroom (nskb ) - skb_headroom (skb );
753741 skb_headers_offset_update (nskb , head_off );
742+
743+ /* Allocate paged area of new skb */
744+ off = size ;
745+ len = skb -> len - off ;
746+
747+ for (i = 0 ; i < MAX_SKB_FRAGS && off < skb -> len ; i ++ ) {
748+ page = alloc_page (GFP_ATOMIC | __GFP_NOWARN );
749+ if (!page ) {
750+ consume_skb (nskb );
751+ goto drop ;
752+ }
753+
754+ size = min_t (u32 , len , PAGE_SIZE );
755+ skb_add_rx_frag (nskb , i , page , 0 , size , PAGE_SIZE );
756+ if (skb_copy_bits (skb , off , page_address (page ),
757+ size )) {
758+ consume_skb (nskb );
759+ goto drop ;
760+ }
761+
762+ len -= size ;
763+ off += size ;
764+ }
765+
754766 consume_skb (skb );
755767 skb = nskb ;
768+ } else if (skb_headroom (skb ) < XDP_PACKET_HEADROOM &&
769+ pskb_expand_head (skb , VETH_XDP_HEADROOM , 0 , GFP_ATOMIC )) {
770+ goto drop ;
756771 }
757772
758773 /* SKB "head" area always have tailroom for skb_shared_info */
759774 frame_sz = skb_end_pointer (skb ) - skb -> head ;
760775 frame_sz += SKB_DATA_ALIGN (sizeof (struct skb_shared_info ));
761- xdp_init_buff (& xdp , frame_sz , & rq -> xdp_rxq );
762- xdp_prepare_buff (& xdp , skb -> head , skb -> mac_header , pktlen , true);
776+ xdp_init_buff (xdp , frame_sz , & rq -> xdp_rxq );
777+ xdp_prepare_buff (xdp , skb -> head , skb_headroom (skb ),
778+ skb_headlen (skb ), true);
779+
780+ if (skb_is_nonlinear (skb )) {
781+ skb_shinfo (skb )-> xdp_frags_size = skb -> data_len ;
782+ xdp_buff_set_frags_flag (xdp );
783+ } else {
784+ xdp_buff_clear_frags_flag (xdp );
785+ }
786+ * pskb = skb ;
787+
788+ return 0 ;
789+ drop :
790+ consume_skb (skb );
791+ * pskb = NULL ;
792+
793+ return - ENOMEM ;
794+ }
795+
796+ static struct sk_buff * veth_xdp_rcv_skb (struct veth_rq * rq ,
797+ struct sk_buff * skb ,
798+ struct veth_xdp_tx_bq * bq ,
799+ struct veth_stats * stats )
800+ {
801+ void * orig_data , * orig_data_end ;
802+ struct bpf_prog * xdp_prog ;
803+ struct xdp_buff xdp ;
804+ u32 act , metalen ;
805+ int off ;
806+
807+ skb_prepare_for_gro (skb );
808+
809+ rcu_read_lock ();
810+ xdp_prog = rcu_dereference (rq -> xdp_prog );
811+ if (unlikely (!xdp_prog )) {
812+ rcu_read_unlock ();
813+ goto out ;
814+ }
815+
816+ __skb_push (skb , skb -> data - skb_mac_header (skb ));
817+ if (veth_convert_xdp_buff_from_skb (rq , & xdp , & skb ))
818+ goto drop ;
763819
764820 orig_data = xdp .data ;
765821 orig_data_end = xdp .data_end ;
@@ -770,7 +826,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
770826 case XDP_PASS :
771827 break ;
772828 case XDP_TX :
773- get_page ( virt_to_page ( xdp . data ) );
829+ veth_xdp_get ( & xdp );
774830 consume_skb (skb );
775831 xdp .rxq -> mem = rq -> xdp_mem ;
776832 if (unlikely (veth_xdp_tx (rq , & xdp , bq ) < 0 )) {
@@ -782,7 +838,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
782838 rcu_read_unlock ();
783839 goto xdp_xmit ;
784840 case XDP_REDIRECT :
785- get_page ( virt_to_page ( xdp . data ) );
841+ veth_xdp_get ( & xdp );
786842 consume_skb (skb );
787843 xdp .rxq -> mem = rq -> xdp_mem ;
788844 if (xdp_do_redirect (rq -> dev , & xdp , xdp_prog )) {
@@ -805,18 +861,24 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
805861 rcu_read_unlock ();
806862
807863 /* check if bpf_xdp_adjust_head was used */
808- delta = orig_data - xdp .data ;
809- off = mac_len + delta ;
864+ off = orig_data - xdp .data ;
810865 if (off > 0 )
811866 __skb_push (skb , off );
812867 else if (off < 0 )
813868 __skb_pull (skb , - off );
814- skb -> mac_header -= delta ;
869+
870+ skb_reset_mac_header (skb );
815871
816872 /* check if bpf_xdp_adjust_tail was used */
817873 off = xdp .data_end - orig_data_end ;
818874 if (off != 0 )
819875 __skb_put (skb , off ); /* positive on grow, negative on shrink */
876+
877+ if (xdp_buff_has_frags (& xdp ))
878+ skb -> data_len = skb_shinfo (skb )-> xdp_frags_size ;
879+ else
880+ skb -> data_len = 0 ;
881+
820882 skb -> protocol = eth_type_trans (skb , rq -> dev );
821883
822884 metalen = xdp .data - xdp .data_meta ;
@@ -832,7 +894,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
832894 return NULL ;
833895err_xdp :
834896 rcu_read_unlock ();
835- page_frag_free ( xdp . data );
897+ xdp_return_buff ( & xdp );
836898xdp_xmit :
837899 return NULL ;
838900}
0 commit comments