@@ -621,100 +621,41 @@ impl<T> [T] {
621621 #[ stable( feature = "rust1" , since = "1.0.0" ) ]
622622 #[ inline]
623623 pub fn reverse ( & mut self ) {
624- let mut i: usize = 0 ;
625- let ln = self . len ( ) ;
626-
627- // For very small types, all the individual reads in the normal
628- // path perform poorly. We can do better, given efficient unaligned
629- // load/store, by loading a larger chunk and reversing a register.
630-
631- // Ideally LLVM would do this for us, as it knows better than we do
632- // whether unaligned reads are efficient (since that changes between
633- // different ARM versions, for example) and what the best chunk size
634- // would be. Unfortunately, as of LLVM 4.0 (2017-05) it only unrolls
635- // the loop, so we need to do this ourselves. (Hypothesis: reverse
636- // is troublesome because the sides can be aligned differently --
637- // will be, when the length is odd -- so there's no way of emitting
638- // pre- and postludes to use fully-aligned SIMD in the middle.)
639-
640- let fast_unaligned = cfg ! ( any( target_arch = "x86" , target_arch = "x86_64" ) ) ;
641-
642- if fast_unaligned && mem:: size_of :: < T > ( ) == 1 {
643- // Use the llvm.bswap intrinsic to reverse u8s in a usize
644- let chunk = mem:: size_of :: < usize > ( ) ;
645- while i + chunk - 1 < ln / 2 {
646- // SAFETY: There are several things to check here:
647- //
648- // - Note that `chunk` is either 4 or 8 due to the cfg check
649- // above. So `chunk - 1` is positive.
650- // - Indexing with index `i` is fine as the loop check guarantees
651- // `i + chunk - 1 < ln / 2`
652- // <=> `i < ln / 2 - (chunk - 1) < ln / 2 < ln`.
653- // - Indexing with index `ln - i - chunk = ln - (i + chunk)` is fine:
654- // - `i + chunk > 0` is trivially true.
655- // - The loop check guarantees:
656- // `i + chunk - 1 < ln / 2`
657- // <=> `i + chunk ≤ ln / 2 ≤ ln`, thus subtraction does not underflow.
658- // - The `read_unaligned` and `write_unaligned` calls are fine:
659- // - `pa` points to index `i` where `i < ln / 2 - (chunk - 1)`
660- // (see above) and `pb` points to index `ln - i - chunk`, so
661- // both are at least `chunk`
662- // many bytes away from the end of `self`.
663- // - Any initialized memory is valid `usize`.
664- unsafe {
665- let ptr = self . as_mut_ptr ( ) ;
666- let pa = ptr. add ( i) ;
667- let pb = ptr. add ( ln - i - chunk) ;
668- let va = ptr:: read_unaligned ( pa as * mut usize ) ;
669- let vb = ptr:: read_unaligned ( pb as * mut usize ) ;
670- ptr:: write_unaligned ( pa as * mut usize , vb. swap_bytes ( ) ) ;
671- ptr:: write_unaligned ( pb as * mut usize , va. swap_bytes ( ) ) ;
672- }
673- i += chunk;
674- }
675- }
624+ let half_len = self . len ( ) / 2 ;
625+ let Range { start, end } = self . as_mut_ptr_range ( ) ;
626+
627+ // These slices will skip the middle item for an odd length,
628+ // since that one doesn't need to move.
629+ let ( front_half, back_half) =
630+ // SAFETY: Both are subparts of the original slice, so the memory
631+ // range is valid, and they don't overlap because they're each only
632+ // half (or less) of the original slice.
633+ unsafe {
634+ (
635+ slice:: from_raw_parts_mut ( start, half_len) ,
636+ slice:: from_raw_parts_mut ( end. sub ( half_len) , half_len) ,
637+ )
638+ } ;
676639
677- if fast_unaligned && mem:: size_of :: < T > ( ) == 2 {
678- // Use rotate-by-16 to reverse u16s in a u32
679- let chunk = mem:: size_of :: < u32 > ( ) / 2 ;
680- while i + chunk - 1 < ln / 2 {
681- // SAFETY: An unaligned u32 can be read from `i` if `i + 1 < ln`
682- // (and obviously `i < ln`), because each element is 2 bytes and
683- // we're reading 4.
684- //
685- // `i + chunk - 1 < ln / 2` # while condition
686- // `i + 2 - 1 < ln / 2`
687- // `i + 1 < ln / 2`
688- //
689- // Since it's less than the length divided by 2, then it must be
690- // in bounds.
691- //
692- // This also means that the condition `0 < i + chunk <= ln` is
693- // always respected, ensuring the `pb` pointer can be used
694- // safely.
695- unsafe {
696- let ptr = self . as_mut_ptr ( ) ;
697- let pa = ptr. add ( i) ;
698- let pb = ptr. add ( ln - i - chunk) ;
699- let va = ptr:: read_unaligned ( pa as * mut u32 ) ;
700- let vb = ptr:: read_unaligned ( pb as * mut u32 ) ;
701- ptr:: write_unaligned ( pa as * mut u32 , vb. rotate_left ( 16 ) ) ;
702- ptr:: write_unaligned ( pb as * mut u32 , va. rotate_left ( 16 ) ) ;
703- }
704- i += chunk;
705- }
706- }
640+ // Introducing a function boundary here means that the two halves
641+ // get `noalias` markers, allowing better optimization as LLVM
642+ // knows that they're disjoint, unlike in the original slice.
643+ revswap ( front_half, back_half, half_len) ;
707644
708- while i < ln / 2 {
709- // SAFETY: `i` is inferior to half the length of the slice so
710- // accessing `i` and `ln - i - 1` is safe (`i` starts at 0 and
711- // will not go further than `ln / 2 - 1`).
712- // The resulting pointers `pa` and `pb` are therefore valid and
713- // aligned, and can be read from and written to.
714- unsafe {
715- self . swap_unchecked ( i, ln - i - 1 ) ;
645+ #[ inline]
646+ fn revswap < T > ( a : & mut [ T ] , b : & mut [ T ] , n : usize ) {
647+ debug_assert_eq ! ( a. len( ) , n) ;
648+ debug_assert_eq ! ( b. len( ) , n) ;
649+
650+ // Because this function is first compiled in isolation,
651+ // this check tells LLVM that the indexing below is
652+ // in-bounds. Then after inlining -- once the actual
653+ // lengths of the slices are known -- it's removed.
654+ let ( a, b) = ( & mut a[ ..n] , & mut b[ ..n] ) ;
655+
656+ for i in 0 ..n {
657+ mem:: swap ( & mut a[ i] , & mut b[ n - 1 - i] ) ;
716658 }
717- i += 1 ;
718659 }
719660 }
720661
0 commit comments