diff --git a/src/libraries/System.Private.CoreLib/src/System/Array.cs b/src/libraries/System.Private.CoreLib/src/System/Array.cs index cb42dbd6159a75..d7eebc759d7961 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Array.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Array.cs @@ -1722,7 +1722,8 @@ public static void Reverse(T[] array) { if (array == null) ThrowHelper.ThrowArgumentNullException(ExceptionArgument.array); - Reverse(array, 0, array.Length); + if (array.Length > 1) + SpanHelpers.Reverse(ref MemoryMarshal.GetArrayDataReference(array), (nuint)array.Length); } public static void Reverse(T[] array, int index, int length) @@ -1739,16 +1740,7 @@ public static void Reverse(T[] array, int index, int length) if (length <= 1) return; - ref T first = ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index); - ref T last = ref Unsafe.Add(ref Unsafe.Add(ref first, length), -1); - do - { - T temp = first; - first = last; - last = temp; - first = ref Unsafe.Add(ref first, 1); - last = ref Unsafe.Add(ref last, -1); - } while (Unsafe.IsAddressLessThan(ref first, ref last)); + SpanHelpers.Reverse(ref Unsafe.Add(ref MemoryMarshal.GetArrayDataReference(array), index), (nuint)length); } // Sorts the elements of an array. The sort compares the elements to each diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index 3e0ff6e492a5d5..2d8ae3f523a43c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -6,6 +6,8 @@ using System.Diagnostics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace System { @@ -1543,21 +1545,10 @@ ref MemoryMarshal.GetReference(value), /// public static void Reverse(this Span span) { - if (span.Length <= 1) + if (span.Length > 1) { - return; + SpanHelpers.Reverse(ref MemoryMarshal.GetReference(span), (nuint)span.Length); } - - ref T first = ref MemoryMarshal.GetReference(span); - ref T last = ref Unsafe.Add(ref Unsafe.Add(ref first, span.Length), -1); - do - { - T temp = first; - first = last; - last = temp; - first = ref Unsafe.Add(ref first, 1); - last = ref Unsafe.Add(ref last, -1); - } while (Unsafe.IsAddressLessThan(ref first, ref last)); } /// diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index e8fca14efa52a2..58bf4c6030486e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -2238,5 +2238,96 @@ private static uint FindFirstMatchedLane(Vector128 compareResult) // Find the first lane that is set inside compareResult. return (uint)BitOperations.TrailingZeroCount(selectedLanes) >> 2; } + + public static void Reverse(ref byte buf, nuint length) + { + if (Avx2.IsSupported && (nuint)Vector256.Count * 2 <= length) + { + Vector256 reverseMask = Vector256.Create( + (byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, // first 128-bit lane + 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); // second 128-bit lane + nuint numElements = (nuint)Vector256.Count; + nuint numIters = (length / numElements) / 2; + for (nuint i = 0; i < numIters; i++) + { + nuint firstOffset = i * numElements; + nuint lastOffset = length - ((1 + i) * numElements); + + // Load in values from beginning and end of the array. + Vector256 tempFirst = Vector256.LoadUnsafe(ref buf, firstOffset); + Vector256 tempLast = Vector256.LoadUnsafe(ref buf, lastOffset); + + // Avx2 operates on two 128-bit lanes rather than the full 256-bit vector. + // Perform a shuffle to reverse each 128-bit lane, then permute to finish reversing the vector: + // +-------------------------------------------------------------------------------+ + // | A1 | B1 | C1 | D1 | E1 | F1 | G1 | H1 | I1 | J1 | K1 | L1 | M1 | N1 | O1 | P1 | + // +-------------------------------------------------------------------------------+ + // | A2 | B2 | C2 | D2 | E2 | F2 | G2 | H2 | I2 | J2 | K2 | L2 | M2 | N2 | O2 | P2 | + // +-------------------------------------------------------------------------------+ + // Shuffle ---> + // +-------------------------------------------------------------------------------+ + // | P1 | O1 | N1 | M1 | L1 | K1 | J1 | I1 | H1 | G1 | F1 | E1 | D1 | C1 | B1 | A1 | + // +-------------------------------------------------------------------------------+ + // | P2 | O2 | N2 | M2 | L2 | K2 | J2 | I2 | H2 | G2 | F2 | E2 | D2 | C2 | B2 | A2 | + // +-------------------------------------------------------------------------------+ + // Permute ---> + // +-------------------------------------------------------------------------------+ + // | P2 | O2 | N2 | M2 | L2 | K2 | J2 | I2 | H2 | G2 | F2 | E2 | D2 | C2 | B2 | A2 | + // +-------------------------------------------------------------------------------+ + // | P1 | O1 | N1 | M1 | L1 | K1 | J1 | I1 | H1 | G1 | F1 | E1 | D1 | C1 | B1 | A1 | + // +-------------------------------------------------------------------------------+ + tempFirst = Avx2.Shuffle(tempFirst, reverseMask); + tempFirst = Avx2.Permute2x128(tempFirst, tempFirst, 0b00_01); + tempLast = Avx2.Shuffle(tempLast, reverseMask); + tempLast = Avx2.Permute2x128(tempLast, tempLast, 0b00_01); + + // Store the reversed vectors + tempLast.StoreUnsafe(ref buf, firstOffset); + tempFirst.StoreUnsafe(ref buf, lastOffset); + } + buf = ref Unsafe.Add(ref buf, numIters * numElements); + length -= numIters * numElements * 2; + } + else if (Sse2.IsSupported && (nuint)Vector128.Count * 2 <= length) + { + Vector128 reverseMask = Vector128.Create((byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); + nuint numElements = (nuint)Vector128.Count; + nuint numIters = (length / numElements) / 2; + for (nuint i = 0; i < numIters; i++) + { + nuint firstOffset = i * numElements; + nuint lastOffset = length - ((1 + i) * numElements); + + // Load in values from beginning and end of the array. + Vector128 tempFirst = Vector128.LoadUnsafe(ref buf, firstOffset); + Vector128 tempLast = Vector128.LoadUnsafe(ref buf, lastOffset); + + // Shuffle to reverse each vector: + // +---------------------------------------------------------------+ + // | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | + // +---------------------------------------------------------------+ + // ---> + // +---------------------------------------------------------------+ + // | P | O | N | M | L | K | J | I | H | G | F | E | D | C | B | A | + // +---------------------------------------------------------------+ + tempFirst = Ssse3.Shuffle(tempFirst, reverseMask); + tempLast = Ssse3.Shuffle(tempLast, reverseMask); + + // Store the reversed vectors + tempLast.StoreUnsafe(ref buf, firstOffset); + tempFirst.StoreUnsafe(ref buf, lastOffset); + } + buf = ref Unsafe.Add(ref buf, numIters * numElements); + length -= numIters * numElements * 2; + } + + // Store any remaining values one-by-one + for (nuint i = 0; i < (length / 2); i++) + { + ref byte first = ref Unsafe.Add(ref buf, i); + ref byte last = ref Unsafe.Add(ref buf, length - 1 - i); + (last, first) = (first, last); + } + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index a662fe73e1ca4a..24994c6504138d 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -2015,5 +2015,93 @@ private static int FindFirstMatchedLane(Vector128 compareResult) return BitOperations.TrailingZeroCount(selectedLanes) >> 3; } + + public static void Reverse(ref char buf, nuint length) + { + ref byte bufByte = ref Unsafe.As(ref buf); + nuint byteLength = length * sizeof(char); + if (Avx2.IsSupported && (nuint)Vector256.Count * 2 <= length) + { + Vector256 reverseMask = Vector256.Create( + (byte)14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, // first 128-bit lane + 14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); // second 128-bit lane + nuint numElements = (nuint)Vector256.Count; + nuint numIters = (byteLength / numElements) / 2; + for (nuint i = 0; i < numIters; i++) + { + nuint firstOffset = i * numElements; + nuint lastOffset = byteLength - ((1 + i) * numElements); + + // Load in values from beginning and end of the array. + Vector256 tempFirst = Vector256.LoadUnsafe(ref bufByte, firstOffset); + Vector256 tempLast = Vector256.LoadUnsafe(ref bufByte, lastOffset); + + // Avx2 operates on two 128-bit lanes rather than the full 256-bit vector. + // Perform a shuffle to reverse each 128-bit lane, then permute to finish reversing the vector: + // +---------------------------------------------------------------+ + // | A | B | C | D | E | F | G | H | I | J | K | L | M | N | O | P | + // +---------------------------------------------------------------+ + // Shuffle ---> + // +---------------------------------------------------------------+ + // | H | G | F | E | D | C | B | A | P | O | N | M | L | K | J | I | + // +---------------------------------------------------------------+ + // Permute ---> + // +---------------------------------------------------------------+ + // | P | O | N | M | L | K | J | I | H | G | F | E | D | C | B | A | + // +---------------------------------------------------------------+ + tempFirst = Avx2.Shuffle(tempFirst, reverseMask); + tempFirst = Avx2.Permute2x128(tempFirst, tempFirst, 0b00_01); + tempLast = Avx2.Shuffle(tempLast, reverseMask); + tempLast = Avx2.Permute2x128(tempLast, tempLast, 0b00_01); + + // Store the reversed vectors + tempLast.StoreUnsafe(ref bufByte, firstOffset); + tempFirst.StoreUnsafe(ref bufByte, lastOffset); + } + bufByte = ref Unsafe.Add(ref bufByte, numIters * numElements); + length -= numIters * (nuint)Vector256.Count * 2; + } + else if (Sse2.IsSupported && (nuint)Vector128.Count * 2 <= length) + { + Vector128 reverseMask = Vector128.Create((byte)14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); + nuint numElements = (nuint)Vector128.Count; + nuint numIters = ((length * sizeof(char)) / numElements) / 2; + for (nuint i = 0; i < numIters; i++) + { + nuint firstOffset = i * numElements; + nuint lastOffset = byteLength - ((1 + i) * numElements); + + // Load in values from beginning and end of the array. + Vector128 tempFirst = Vector128.LoadUnsafe(ref bufByte, firstOffset); + Vector128 tempLast = Vector128.LoadUnsafe(ref bufByte, lastOffset); + + // Shuffle to reverse each vector: + // +-------------------------------+ + // | A | B | C | D | E | F | G | H | + // +-------------------------------+ + // ---> + // +-------------------------------+ + // | H | G | F | E | D | C | B | A | + // +-------------------------------+ + tempFirst = Ssse3.Shuffle(tempFirst, reverseMask); + tempLast = Ssse3.Shuffle(tempLast, reverseMask); + + // Store the reversed vectors + tempLast.StoreUnsafe(ref bufByte, firstOffset); + tempFirst.StoreUnsafe(ref bufByte, lastOffset); + } + bufByte = ref Unsafe.Add(ref bufByte, numIters * numElements); + length -= numIters * (nuint)Vector128.Count * 2; + } + + // Store any remaining values one-by-one + buf = ref Unsafe.As(ref bufByte); + for (nuint i = 0; i < (length / 2); i++) + { + ref char first = ref Unsafe.Add(ref buf, i); + ref char last = ref Unsafe.Add(ref buf, length - 1 - i); + (last, first) = (first, last); + } + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs index fba4f5cdbebcdc..07bf18d04e7eec 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.cs @@ -3,6 +3,9 @@ using System.Diagnostics; using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.X86; namespace System { @@ -403,5 +406,200 @@ public static unsafe void ClearWithReferences(ref IntPtr ip, nuint pointerSizeLe // Write only element. ip = default; } + + public static void Reverse(ref int buf, nuint length) + { + if (Avx2.IsSupported && (nuint)Vector256.Count * 2 <= length) + { + nuint numElements = (nuint)Vector256.Count; + nuint numIters = (length / numElements) / 2; + Vector256 reverseMask = Vector256.Create(7, 6, 5, 4, 3, 2, 1, 0); + for (nuint i = 0; i < numIters; i++) + { + nuint firstOffset = i * numElements; + nuint lastOffset = length - ((1 + i) * numElements); + + // Load the values into vectors + Vector256 tempFirst = Vector256.LoadUnsafe(ref buf, firstOffset); + Vector256 tempLast = Vector256.LoadUnsafe(ref buf, lastOffset); + + // Permute to reverse each vector: + // +-------------------------------+ + // | A | B | C | D | E | F | G | H | + // +-------------------------------+ + // ---> + // +-------------------------------+ + // | H | G | F | E | D | C | B | A | + // +-------------------------------+ + tempFirst = Avx2.PermuteVar8x32(tempFirst, reverseMask); + tempLast = Avx2.PermuteVar8x32(tempLast, reverseMask); + + // Store the values into final location + tempLast.StoreUnsafe(ref buf, firstOffset); + tempFirst.StoreUnsafe(ref buf, lastOffset); + } + buf = ref Unsafe.Add(ref buf, numIters * numElements); + length -= numIters * numElements * 2; + } + else if (Sse2.IsSupported && (nuint)Vector128.Count * 2 <= length) + { + nuint numElements = (nuint)Vector128.Count; + nuint numIters = (length / numElements) / 2; + for (nuint i = 0; i < numIters; i++) + { + nuint firstOffset = i * numElements; + nuint lastOffset = length - ((1 + i) * numElements); + + // Load the values into vectors + Vector128 tempFirst = Vector128.LoadUnsafe(ref buf, firstOffset); + Vector128 tempLast = Vector128.LoadUnsafe(ref buf, lastOffset); + + // Shuffle to reverse each vector: + // +---------------+ + // | A | B | C | D | + // +---------------+ + // ---> + // +---------------+ + // | D | C | B | A | + // +---------------+ + tempFirst = Sse2.Shuffle(tempFirst, 0b00_01_10_11); + tempLast = Sse2.Shuffle(tempLast, 0b00_01_10_11); + + // Store the values into final location + tempLast.StoreUnsafe(ref buf, firstOffset); + tempFirst.StoreUnsafe(ref buf, lastOffset); + } + buf = ref Unsafe.Add(ref buf, numIters * numElements); + length -= numIters * numElements * 2; + } + + // Store any remaining values one-by-one + for (nuint i = 0; i < (length / 2); i++) + { + ref int firstInt = ref Unsafe.Add(ref buf, i); + ref int lastInt = ref Unsafe.Add(ref buf, length - 1 - i); + (lastInt, firstInt) = (firstInt, lastInt); + } + } + + public static void Reverse(ref long buf, nuint length) + { + if (Avx2.IsSupported && (nuint)Vector256.Count * 2 <= length) + { + nuint numElements = (nuint)Vector256.Count; + nuint numIters = (length / numElements) / 2; + for (nuint i = 0; i < numIters; i++) + { + nuint firstOffset = i * numElements; + nuint lastOffset = length - ((1 + i) * numElements); + // Load the values into vectors + Vector256 tempFirst = Vector256.LoadUnsafe(ref buf, firstOffset); + Vector256 tempLast = Vector256.LoadUnsafe(ref buf, lastOffset); + + // Permute to reverse each vector: + // +---------------+ + // | A | B | C | D | + // +---------------+ + // ---> + // +---------------+ + // | D | C | B | A | + // +---------------+ + tempFirst = Avx2.Permute4x64(tempFirst, 0b00_01_10_11); + tempLast = Avx2.Permute4x64(tempLast, 0b00_01_10_11); + + // Store the values into final location + tempLast.StoreUnsafe(ref buf, firstOffset); + tempFirst.StoreUnsafe(ref buf, lastOffset); + } + buf = ref Unsafe.Add(ref buf, numIters * numElements); + length -= numIters * numElements * 2; + } + else if (Sse2.IsSupported && (nuint)Vector128.Count * 2 <= length) + { + ref int bufInt = ref Unsafe.As(ref buf); + nuint intLength = length * (sizeof(long) / sizeof(int)); + nuint numElements = (nuint)Vector128.Count; + nuint numIters = (intLength / numElements) / 2; + for (nuint i = 0; i < numIters; i++) + { + nuint firstOffset = i * numElements; + nuint lastOffset = intLength - ((1 + i) * numElements); + // Load the values into vectors + Vector128 tempFirst = Vector128.LoadUnsafe(ref bufInt, firstOffset); + Vector128 tempLast = Vector128.LoadUnsafe(ref bufInt, lastOffset); + + // Shuffle to reverse each vector: + // +-------+ + // | A | B | + // +-------+ + // ---> + // +-------+ + // | B | A | + // +-------+ + tempFirst = Sse2.Shuffle(tempFirst, 0b0100_1110); + tempLast = Sse2.Shuffle(tempLast, 0b0100_1110); + + // Store the values into final location + tempLast.StoreUnsafe(ref bufInt, firstOffset); + tempFirst.StoreUnsafe(ref bufInt, lastOffset); + } + bufInt = ref Unsafe.Add(ref bufInt, numIters * numElements); + buf = ref Unsafe.As(ref bufInt); + length -= numIters * (nuint)Vector128.Count * 2; + } + + // Store any remaining values one-by-one + for (nuint i = 0; i < (length / 2); i++) + { + ref long firstLong = ref Unsafe.Add(ref buf, i); + ref long lastLong = ref Unsafe.Add(ref buf, length - 1 - i); + (lastLong, firstLong) = (firstLong, lastLong); + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static void Reverse(ref T elements, nuint length) + { + Debug.Assert(length > 0); + if (!RuntimeHelpers.IsReferenceOrContainsReferences()) + { + if (Unsafe.SizeOf() == sizeof(byte)) + { + Reverse(ref Unsafe.As(ref elements), length); + return; + } + else if (Unsafe.SizeOf() == sizeof(char)) + { + Reverse(ref Unsafe.As(ref elements), length); + return; + } + else if (Unsafe.SizeOf() == sizeof(int)) + { + Reverse(ref Unsafe.As(ref elements), length); + return; + } + else if (Unsafe.SizeOf() == sizeof(long)) + { + Reverse(ref Unsafe.As(ref elements), length); + return; + } + } + ReverseInner(ref elements, length); + } + + private static void ReverseInner(ref T elements, nuint length) + { + Debug.Assert(length > 0); + ref T first = ref elements; + ref T last = ref Unsafe.Subtract(ref Unsafe.Add(ref first, (int)length), 1); + do + { + T temp = first; + first = last; + last = temp; + first = ref Unsafe.Add(ref first, 1); + last = ref Unsafe.Subtract(ref last, 1); + } while (Unsafe.IsAddressLessThan(ref first, ref last)); + } } }