99use std:: io:: Read ;
1010use std:: mem;
1111use std:: net:: Ipv4Addr ;
12+ use std:: num:: Wrapping ;
1213use std:: sync:: { Arc , Mutex } ;
1314
1415use libc:: EAGAIN ;
15- use log:: { error, warn } ;
16+ use log:: error;
1617use utils:: eventfd:: EventFd ;
1718use utils:: net:: mac:: MacAddr ;
18- use utils:: u64_to_usize;
19- use vm_memory:: GuestMemoryError ;
19+ use utils:: { u64_to_usize, usize_to_u64 } ;
20+ use vm_memory:: { GuestAddress , GuestMemory , GuestMemoryError } ;
2021
2122use crate :: devices:: virtio:: device:: { DeviceState , IrqTrigger , IrqType , VirtioDevice } ;
2223use crate :: devices:: virtio:: gen:: virtio_blk:: VIRTIO_F_VERSION_1 ;
2324use crate :: devices:: virtio:: gen:: virtio_net:: {
2425 virtio_net_hdr_v1, VIRTIO_NET_F_CSUM , VIRTIO_NET_F_GUEST_CSUM , VIRTIO_NET_F_GUEST_TSO4 ,
2526 VIRTIO_NET_F_GUEST_UFO , VIRTIO_NET_F_HOST_TSO4 , VIRTIO_NET_F_HOST_UFO , VIRTIO_NET_F_MAC ,
27+ VIRTIO_NET_F_MRG_RXBUF ,
2628} ;
2729use crate :: devices:: virtio:: gen:: virtio_ring:: VIRTIO_RING_F_EVENT_IDX ;
2830use crate :: devices:: virtio:: iovec:: IoVecBuffer ;
@@ -31,7 +33,7 @@ use crate::devices::virtio::net::tap::Tap;
3133use crate :: devices:: virtio:: net:: {
3234 gen, NetError , NetQueue , MAX_BUFFER_SIZE , NET_QUEUE_SIZES , RX_INDEX , TX_INDEX ,
3335} ;
34- use crate :: devices:: virtio:: queue:: { DescriptorChain , Queue } ;
36+ use crate :: devices:: virtio:: queue:: Queue ;
3537use crate :: devices:: virtio:: { ActivateError , TYPE_NET } ;
3638use crate :: devices:: { report_net_event_fail, DeviceError } ;
3739use crate :: dumbo:: pdu:: arp:: ETH_IPV4_FRAME_LEN ;
@@ -46,16 +48,14 @@ const FRAME_HEADER_MAX_LEN: usize = PAYLOAD_OFFSET + ETH_IPV4_FRAME_LEN;
4648
4749#[ derive( Debug , thiserror:: Error , displaydoc:: Display ) ]
4850enum FrontendError {
49- /// Add user.
50- AddUsed ,
51- /// Descriptor chain too mall.
52- DescriptorChainTooSmall ,
5351 /// Empty queue.
5452 EmptyQueue ,
5553 /// Guest memory error: {0}
5654 GuestMemory ( GuestMemoryError ) ,
57- /// Read only descriptor.
58- ReadOnlyDescriptor ,
55+ /// Attempt to write an empty packet.
56+ AttemptToWriteEmptyPacket ,
57+ /// Attempt to use more than 1 head to process a packet without VIRTIO_NET_F_MRG_RXBUF support.
58+ NoMrgBufSupport ,
5959}
6060
6161pub ( crate ) const fn vnet_hdr_len ( ) -> usize {
@@ -102,6 +102,20 @@ pub struct ConfigSpace {
102102// SAFETY: `ConfigSpace` contains only PODs in `repr(C)` or `repr(transparent)`, without padding.
103103unsafe impl ByteValued for ConfigSpace { }
104104
105+ // This struct contains information about partially
106+ // written packet.
107+ #[ derive( Debug ) ]
108+ struct PartialWrite {
109+ // Amount of bytes written so far.
110+ bytes_written : usize ,
111+ // Amount of descriptor heads used for the packet.
112+ used_heads : u16 ,
113+ // Guest address of the first buffer used for the packet.
114+ // This will be used to set number of descriptors heads used
115+ // to store the whole packet.
116+ packet_start_addr : GuestAddress ,
117+ }
118+
105119/// VirtIO network device.
106120///
107121/// It emulates a network device able to exchange L2 frames between the guest
@@ -126,6 +140,7 @@ pub struct Net {
126140
127141 rx_bytes_read : usize ,
128142 rx_frame_buf : [ u8 ; MAX_BUFFER_SIZE ] ,
143+ rx_partial_write : Option < PartialWrite > ,
129144
130145 tx_frame_headers : [ u8 ; frame_hdr_len ( ) ] ,
131146
@@ -159,6 +174,7 @@ impl Net {
159174 | 1 << VIRTIO_NET_F_HOST_TSO4
160175 | 1 << VIRTIO_NET_F_HOST_UFO
161176 | 1 << VIRTIO_F_VERSION_1
177+ | 1 << VIRTIO_NET_F_MRG_RXBUF
162178 | 1 << VIRTIO_RING_F_EVENT_IDX ;
163179
164180 let mut config_space = ConfigSpace :: default ( ) ;
@@ -188,6 +204,7 @@ impl Net {
188204 rx_deferred_frame : false ,
189205 rx_bytes_read : 0 ,
190206 rx_frame_buf : [ 0u8 ; MAX_BUFFER_SIZE ] ,
207+ rx_partial_write : None ,
191208 tx_frame_headers : [ 0u8 ; frame_hdr_len ( ) ] ,
192209 irq_trigger : IrqTrigger :: new ( ) . map_err ( NetError :: EventFd ) ?,
193210 config_space,
@@ -316,7 +333,7 @@ impl Net {
316333 }
317334
318335 // Attempt frame delivery.
319- let success = self . write_frame_to_guest ( ) ;
336+ let success = self . write_frame_to_guest ( ) . is_ok ( ) ;
320337
321338 // Undo the tokens consumption if guest delivery failed.
322339 if !success {
@@ -327,108 +344,149 @@ impl Net {
327344 success
328345 }
329346
330- /// Write a slice in a descriptor chain
331- ///
332- /// # Errors
333- ///
334- /// Returns an error if the descriptor chain is too short or
335- /// an inappropriate (read only) descriptor is found in the chain
336- fn write_to_descriptor_chain (
337- mem : & GuestMemoryMmap ,
338- data : & [ u8 ] ,
339- head : DescriptorChain ,
340- net_metrics : & NetDeviceMetrics ,
341- ) -> Result < ( ) , FrontendError > {
342- let mut chunk = data;
343- let mut next_descriptor = Some ( head) ;
347+ fn write_frame_to_guest ( & mut self ) -> Result < ( ) , FrontendError > {
348+ // This is safe since we checked in the event handler that the device is activated.
349+ let mem = self . device_state . mem ( ) . unwrap ( ) ;
350+
351+ // Check if queue is empty and early error out
352+ if self . queues [ RX_INDEX ] . is_empty ( mem) {
353+ return Err ( FrontendError :: EmptyQueue ) ;
354+ }
344355
345- while let Some ( descriptor) = & next_descriptor {
346- if !descriptor. is_write_only ( ) {
347- return Err ( FrontendError :: ReadOnlyDescriptor ) ;
356+ let actual_size = self . queues [ RX_INDEX ] . actual_size ( ) ;
357+
358+ let ( mut slice, mut packet_start_addr, mut used_heads, mut next_used) =
359+ if let Some ( pw) = & self . rx_partial_write {
360+ (
361+ & self . rx_frame_buf [ pw. bytes_written ..self . rx_bytes_read ] ,
362+ Some ( pw. packet_start_addr ) ,
363+ pw. used_heads ,
364+ self . queues [ RX_INDEX ] . next_used + Wrapping ( pw. used_heads ) ,
365+ )
366+ } else {
367+ (
368+ & self . rx_frame_buf [ ..self . rx_bytes_read ] ,
369+ None ,
370+ 0 ,
371+ self . queues [ RX_INDEX ] . next_used ,
372+ )
373+ } ;
374+
375+ while !self . queues [ RX_INDEX ] . is_empty ( mem) && !slice. is_empty ( ) {
376+ used_heads += 1 ;
377+
378+ if !self . has_feature ( u64:: from ( VIRTIO_NET_F_MRG_RXBUF ) ) && 1 < used_heads {
379+ return Err ( FrontendError :: NoMrgBufSupport ) ;
348380 }
349381
350- let len = std:: cmp:: min ( chunk. len ( ) , descriptor. len as usize ) ;
351- match mem. write_slice ( & chunk[ ..len] , descriptor. addr ) {
352- Ok ( ( ) ) => {
353- net_metrics. rx_count . inc ( ) ;
354- chunk = & chunk[ len..] ;
355- }
356- Err ( err) => {
357- error ! ( "Failed to write slice: {:?}" , err) ;
358- if let GuestMemoryError :: PartialBuffer { .. } = err {
359- net_metrics. rx_partial_writes . inc ( ) ;
360- }
361- return Err ( FrontendError :: GuestMemory ( err) ) ;
362- }
382+ // SAFETY:
383+ // This should never panic as we just checked that the queue is not empty.
384+ let mut desc = self . queues [ RX_INDEX ] . do_pop_unchecked ( mem) . unwrap ( ) ;
385+
386+ let head_desc_index = desc. index ;
387+ let mut desc_len = 0 ;
388+
389+ // If this is the first head of the packet, save it for later
390+ if packet_start_addr. is_none ( ) {
391+ packet_start_addr = Some ( desc. addr )
363392 }
364393
365- // If chunk is empty we are done here.
366- if chunk. is_empty ( ) {
367- let len = data. len ( ) as u64 ;
368- net_metrics. rx_bytes_count . add ( len) ;
369- net_metrics. rx_packets_count . inc ( ) ;
370- return Ok ( ( ) ) ;
394+ // Handle descriptor chain head
395+ let len = slice. len ( ) . min ( desc. len as usize ) ;
396+ mem. write_slice ( & slice[ ..len] , desc. addr )
397+ . map_err ( FrontendError :: GuestMemory ) ?;
398+ desc_len += len;
399+ slice = & slice[ len..] ;
400+
401+ // Handle following descriptors
402+ while desc. has_next ( ) && !slice. is_empty ( ) {
403+ // SAFETY:
404+ // This should never panic as we just checked there is next descriptor.
405+ desc = desc. next_descriptor ( ) . unwrap ( ) ;
406+
407+ let len = slice. len ( ) . min ( desc. len as usize ) ;
408+ mem. write_slice ( & slice[ ..len] , desc. addr )
409+ . map_err ( FrontendError :: GuestMemory ) ?;
410+ desc_len += len;
411+ slice = & slice[ len..] ;
371412 }
372413
373- next_descriptor = descriptor. next_descriptor ( ) ;
414+ // Add used descriptor head to used ring
415+ let next_used_index = next_used. 0 % actual_size;
416+ // SAFETY:
417+ // This should never panic as we provide index in
418+ // correct bounds.
419+ self . queues [ RX_INDEX ]
420+ . write_used_ring (
421+ mem,
422+ next_used_index,
423+ head_desc_index,
424+ u32:: try_from ( desc_len) . unwrap ( ) ,
425+ )
426+ . unwrap ( ) ;
427+
428+ // We don't update queues internal next_used value just yet.
429+ // This is done to prevent giving information to the guest
430+ // about descriptor heads we used for partial packets.
431+ next_used += Wrapping ( 1 ) ;
374432 }
375433
376- warn ! ( "Receiving buffer is too small to hold frame of current size" ) ;
377- Err ( FrontendError :: DescriptorChainTooSmall )
378- }
434+ let packet_start_addr =
435+ packet_start_addr. ok_or ( FrontendError :: AttemptToWriteEmptyPacket ) ?;
379436
380- // Copies a single frame from `self.rx_frame_buf` into the guest.
381- fn do_write_frame_to_guest ( & mut self ) -> Result < ( ) , FrontendError > {
382- // This is safe since we checked in the event handler that the device is activated.
383- let mem = self . device_state . mem ( ) . unwrap ( ) ;
384-
385- let queue = & mut self . queues [ RX_INDEX ] ;
386- let head_descriptor = queue. pop_or_enable_notification ( mem) . ok_or_else ( || {
387- self . metrics . no_rx_avail_buffer . inc ( ) ;
388- FrontendError :: EmptyQueue
389- } ) ?;
390- let head_index = head_descriptor. index ;
437+ if slice. is_empty ( ) {
438+ // Packet was fully written.
439+ self . metrics
440+ . rx_bytes_count
441+ . add ( usize_to_u64 ( self . rx_bytes_read ) ) ;
442+ self . metrics . rx_packets_count . inc ( ) ;
443+
444+ // TODO: can be simplified by usage of `std::mem::offset_of`
445+ // when Rust version is updated.
446+ //
447+ // Update number of descriptor heads used to store a packet.
448+ // SAFETY:
449+ // The packet_start_addr is valid guest address and we check
450+ // memory boundaries.
451+ #[ allow( clippy:: transmute_ptr_to_ref) ]
452+ let header: & mut virtio_net_hdr_v1 = unsafe {
453+ let header_slice = mem
454+ . get_slice ( packet_start_addr, std:: mem:: size_of :: < virtio_net_hdr_v1 > ( ) )
455+ . map_err ( FrontendError :: GuestMemory ) ?;
456+ std:: mem:: transmute ( header_slice. ptr_guard_mut ( ) . as_ptr ( ) )
457+ } ;
458+ header. num_buffers = used_heads;
391459
392- let result = Self :: write_to_descriptor_chain (
393- mem,
394- & self . rx_frame_buf [ ..self . rx_bytes_read ] ,
395- head_descriptor,
396- & self . metrics ,
397- ) ;
398- // Mark the descriptor chain as used. If an error occurred, skip the descriptor chain.
399- let used_len = if result. is_err ( ) {
400- self . metrics . rx_fails . inc ( ) ;
401- 0
402- } else {
403- // Safe to unwrap because a frame must be smaller than 2^16 bytes.
404- u32:: try_from ( self . rx_bytes_read ) . unwrap ( )
405- } ;
406- queue. add_used ( mem, head_index, used_len) . map_err ( |err| {
407- error ! ( "Failed to add available descriptor {}: {}" , head_index, err) ;
408- FrontendError :: AddUsed
409- } ) ?;
460+ // Update queues internals
461+ self . queues [ RX_INDEX ] . next_used = next_used;
462+ self . queues [ RX_INDEX ] . num_added += Wrapping ( used_heads) ;
410463
411- result
412- }
464+ // Update used ring with what we used to process the packet
465+ self . queues [ RX_INDEX ] . set_used_ring_idx ( next_used. 0 , mem) ;
466+ let next_avail = self . queues [ RX_INDEX ] . next_avail . 0 ;
467+ self . queues [ RX_INDEX ] . set_used_ring_avail_event ( next_avail, mem) ;
413468
414- // Copies a single frame from `self.rx_frame_buf` into the guest. In case of an error retries
415- // the operation if possible. Returns true if the operation was successfull.
416- fn write_frame_to_guest ( & mut self ) -> bool {
417- let max_iterations = self . queues [ RX_INDEX ] . actual_size ( ) ;
418- for _ in 0 ..max_iterations {
419- match self . do_write_frame_to_guest ( ) {
420- Ok ( ( ) ) => return true ,
421- Err ( FrontendError :: EmptyQueue ) | Err ( FrontendError :: AddUsed ) => {
422- return false ;
423- }
424- Err ( _) => {
425- // retry
426- continue ;
427- }
469+ // Clear partial write info if there was one
470+ self . rx_partial_write = None ;
471+ Ok ( ( ) )
472+ } else {
473+ // Packet could not be fully written to the guest
474+ // Save necessary info to use it during next invocation.
475+ self . metrics . rx_partial_writes . inc ( ) ;
476+
477+ if let Some ( pw) = & mut self . rx_partial_write {
478+ pw. bytes_written = self . rx_bytes_read - slice. len ( ) ;
479+ pw. used_heads = used_heads;
480+ } else {
481+ let pw = PartialWrite {
482+ bytes_written : self . rx_bytes_read - slice. len ( ) ,
483+ used_heads,
484+ packet_start_addr,
485+ } ;
486+ self . rx_partial_write = Some ( pw) ;
428487 }
488+ Err ( FrontendError :: EmptyQueue )
429489 }
430-
431- false
432490 }
433491
434492 // Tries to detour the frame to MMDS and if MMDS doesn't accept it, sends it on the host TAP.
0 commit comments