Skip to content

Commit de005e5

Browse files
authored
Revert "Improve Span.Reverse fast path performance (#70944)" (#78605)
This reverts commit 6ddd06c.
1 parent 3ebeb75 commit de005e5

File tree

3 files changed

+139
-206
lines changed

3 files changed

+139
-206
lines changed

src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs

Lines changed: 32 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// Licensed to the .NET Foundation under one or more agreements.
22
// The .NET Foundation licenses this file to you under the MIT license.
33

4-
using System.Buffers.Binary;
54
using System.Diagnostics;
65
using System.Diagnostics.CodeAnalysis;
76
using System.Numerics;
@@ -1130,23 +1129,21 @@ private static unsafe nuint UnalignedCountVector128(ref byte searchSpace)
11301129

11311130
public static void Reverse(ref byte buf, nuint length)
11321131
{
1133-
Debug.Assert(length > 1);
1134-
1135-
nint remainder = (nint)length;
1136-
nint offset = 0;
1137-
1138-
if (Avx2.IsSupported && remainder >= Vector256<byte>.Count)
1132+
if (Avx2.IsSupported && (nuint)Vector256<byte>.Count * 2 <= length)
11391133
{
11401134
Vector256<byte> reverseMask = Vector256.Create(
11411135
(byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, // first 128-bit lane
11421136
15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); // second 128-bit lane
1143-
1144-
nint lastOffset = remainder - Vector256<byte>.Count;
1145-
do
1137+
nuint numElements = (nuint)Vector256<byte>.Count;
1138+
nuint numIters = (length / numElements) / 2;
1139+
for (nuint i = 0; i < numIters; i++)
11461140
{
1147-
// Load the values into vectors
1148-
Vector256<byte> tempFirst = Vector256.LoadUnsafe(ref buf, (nuint)offset);
1149-
Vector256<byte> tempLast = Vector256.LoadUnsafe(ref buf, (nuint)lastOffset);
1141+
nuint firstOffset = i * numElements;
1142+
nuint lastOffset = length - ((1 + i) * numElements);
1143+
1144+
// Load in values from beginning and end of the array.
1145+
Vector256<byte> tempFirst = Vector256.LoadUnsafe(ref buf, firstOffset);
1146+
Vector256<byte> tempLast = Vector256.LoadUnsafe(ref buf, lastOffset);
11501147

11511148
// Avx2 operates on two 128-bit lanes rather than the full 256-bit vector.
11521149
// Perform a shuffle to reverse each 128-bit lane, then permute to finish reversing the vector:
@@ -1173,23 +1170,24 @@ public static void Reverse(ref byte buf, nuint length)
11731170
tempLast = Avx2.Permute2x128(tempLast, tempLast, 0b00_01);
11741171

11751172
// Store the reversed vectors
1176-
tempLast.StoreUnsafe(ref buf, (nuint)offset);
1177-
tempFirst.StoreUnsafe(ref buf, (nuint)lastOffset);
1178-
1179-
offset += Vector256<byte>.Count;
1180-
lastOffset -= Vector256<byte>.Count;
1181-
} while (lastOffset >= offset);
1182-
1183-
remainder = lastOffset + Vector256<byte>.Count - offset;
1173+
tempLast.StoreUnsafe(ref buf, firstOffset);
1174+
tempFirst.StoreUnsafe(ref buf, lastOffset);
1175+
}
1176+
buf = ref Unsafe.Add(ref buf, numIters * numElements);
1177+
length -= numIters * numElements * 2;
11841178
}
1185-
else if (Vector128.IsHardwareAccelerated && remainder >= Vector128<byte>.Count)
1179+
else if (Vector128.IsHardwareAccelerated && (nuint)Vector128<byte>.Count * 2 <= length)
11861180
{
1187-
nint lastOffset = remainder - Vector128<byte>.Count;
1188-
do
1181+
nuint numElements = (nuint)Vector128<byte>.Count;
1182+
nuint numIters = (length / numElements) / 2;
1183+
for (nuint i = 0; i < numIters; i++)
11891184
{
1190-
// Load the values into vectors
1191-
Vector128<byte> tempFirst = Vector128.LoadUnsafe(ref buf, (nuint)offset);
1192-
Vector128<byte> tempLast = Vector128.LoadUnsafe(ref buf, (nuint)lastOffset);
1185+
nuint firstOffset = i * numElements;
1186+
nuint lastOffset = length - ((1 + i) * numElements);
1187+
1188+
// Load in values from beginning and end of the array.
1189+
Vector128<byte> tempFirst = Vector128.LoadUnsafe(ref buf, firstOffset);
1190+
Vector128<byte> tempLast = Vector128.LoadUnsafe(ref buf, lastOffset);
11931191

11941192
// Shuffle to reverse each vector:
11951193
// +---------------------------------------------------------------+
@@ -1205,58 +1203,15 @@ public static void Reverse(ref byte buf, nuint length)
12051203
(byte)15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0));
12061204

12071205
// Store the reversed vectors
1208-
tempLast.StoreUnsafe(ref buf, (nuint)offset);
1209-
tempFirst.StoreUnsafe(ref buf, (nuint)lastOffset);
1210-
1211-
offset += Vector128<byte>.Count;
1212-
lastOffset -= Vector128<byte>.Count;
1213-
} while (lastOffset >= offset);
1214-
1215-
remainder = lastOffset + Vector128<byte>.Count - offset;
1216-
}
1217-
1218-
if (remainder >= sizeof(long))
1219-
{
1220-
nint lastOffset = (nint)length - offset - sizeof(long);
1221-
do
1222-
{
1223-
long tempFirst = Unsafe.ReadUnaligned<long>(ref Unsafe.Add(ref buf, offset));
1224-
long tempLast = Unsafe.ReadUnaligned<long>(ref Unsafe.Add(ref buf, lastOffset));
1225-
1226-
// swap and store in reversed position
1227-
Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, offset), BinaryPrimitives.ReverseEndianness(tempLast));
1228-
Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, lastOffset), BinaryPrimitives.ReverseEndianness(tempFirst));
1229-
1230-
offset += sizeof(long);
1231-
lastOffset -= sizeof(long);
1232-
} while (lastOffset >= offset);
1233-
1234-
remainder = lastOffset + sizeof(long) - offset;
1235-
}
1236-
1237-
if (remainder >= sizeof(int))
1238-
{
1239-
nint lastOffset = (nint)length - offset - sizeof(int);
1240-
do
1241-
{
1242-
int tempFirst = Unsafe.ReadUnaligned<int>(ref Unsafe.Add(ref buf, offset));
1243-
int tempLast = Unsafe.ReadUnaligned<int>(ref Unsafe.Add(ref buf, lastOffset));
1244-
1245-
// swap and store in reversed position
1246-
Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, offset), BinaryPrimitives.ReverseEndianness(tempLast));
1247-
Unsafe.WriteUnaligned(ref Unsafe.Add(ref buf, lastOffset), BinaryPrimitives.ReverseEndianness(tempFirst));
1248-
1249-
offset += sizeof(int);
1250-
lastOffset -= sizeof(int);
1251-
} while (lastOffset >= offset);
1252-
1253-
remainder = lastOffset + sizeof(int) - offset;
1206+
tempLast.StoreUnsafe(ref buf, firstOffset);
1207+
tempFirst.StoreUnsafe(ref buf, lastOffset);
1208+
}
1209+
buf = ref Unsafe.Add(ref buf, numIters * numElements);
1210+
length -= numIters * numElements * 2;
12541211
}
12551212

1256-
if (remainder > 1)
1257-
{
1258-
ReverseInner(ref Unsafe.Add(ref buf, offset), (nuint)remainder);
1259-
}
1213+
// Store any remaining values one-by-one
1214+
ReverseInner(ref buf, length);
12601215
}
12611216
}
12621217
}

src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs

Lines changed: 38 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -733,25 +733,23 @@ private static unsafe nint UnalignedCountVector128(ref char searchSpace)
733733

734734
public static void Reverse(ref char buf, nuint length)
735735
{
736-
Debug.Assert(length > 1);
737-
738-
nint remainder = (nint)length;
739-
nint offset = 0;
740-
741-
if (Avx2.IsSupported && remainder >= Vector256<ushort>.Count)
736+
if (Avx2.IsSupported && (nuint)Vector256<short>.Count * 2 <= length)
742737
{
738+
ref byte bufByte = ref Unsafe.As<char, byte>(ref buf);
739+
nuint byteLength = length * sizeof(char);
743740
Vector256<byte> reverseMask = Vector256.Create(
744741
(byte)14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1, // first 128-bit lane
745742
14, 15, 12, 13, 10, 11, 8, 9, 6, 7, 4, 5, 2, 3, 0, 1); // second 128-bit lane
746-
747-
nint lastOffset = remainder - Vector256<ushort>.Count;
748-
do
743+
nuint numElements = (nuint)Vector256<byte>.Count;
744+
nuint numIters = (byteLength / numElements) / 2;
745+
for (nuint i = 0; i < numIters; i++)
749746
{
750-
ref byte first = ref Unsafe.As<char, byte>(ref Unsafe.Add(ref buf, offset));
751-
ref byte last = ref Unsafe.As<char, byte>(ref Unsafe.Add(ref buf, lastOffset));
747+
nuint firstOffset = i * numElements;
748+
nuint lastOffset = byteLength - ((1 + i) * numElements);
752749

753-
Vector256<byte> tempFirst = Vector256.LoadUnsafe(ref first);
754-
Vector256<byte> tempLast = Vector256.LoadUnsafe(ref last);
750+
// Load in values from beginning and end of the array.
751+
Vector256<byte> tempFirst = Vector256.LoadUnsafe(ref bufByte, firstOffset);
752+
Vector256<byte> tempLast = Vector256.LoadUnsafe(ref bufByte, lastOffset);
755753

756754
// Avx2 operates on two 128-bit lanes rather than the full 256-bit vector.
757755
// Perform a shuffle to reverse each 128-bit lane, then permute to finish reversing the vector:
@@ -772,25 +770,27 @@ public static void Reverse(ref char buf, nuint length)
772770
tempLast = Avx2.Permute2x128(tempLast, tempLast, 0b00_01);
773771

774772
// Store the reversed vectors
775-
tempLast.StoreUnsafe(ref first);
776-
tempFirst.StoreUnsafe(ref last);
777-
778-
offset += Vector256<ushort>.Count;
779-
lastOffset -= Vector256<ushort>.Count;
780-
} while (lastOffset >= offset);
781-
782-
remainder = (lastOffset + Vector256<ushort>.Count - offset);
773+
tempLast.StoreUnsafe(ref bufByte, firstOffset);
774+
tempFirst.StoreUnsafe(ref bufByte, lastOffset);
775+
}
776+
bufByte = ref Unsafe.Add(ref bufByte, numIters * numElements);
777+
length -= numIters * (nuint)Vector256<short>.Count * 2;
778+
// Store any remaining values one-by-one
779+
buf = ref Unsafe.As<byte, char>(ref bufByte);
783780
}
784-
else if (Vector128.IsHardwareAccelerated && remainder >= Vector128<ushort>.Count)
781+
else if (Vector128.IsHardwareAccelerated && (nuint)Vector128<short>.Count * 2 <= length)
785782
{
786-
nint lastOffset = remainder - Vector128<ushort>.Count;
787-
do
783+
ref short bufShort = ref Unsafe.As<char, short>(ref buf);
784+
nuint numElements = (nuint)Vector128<short>.Count;
785+
nuint numIters = (length / numElements) / 2;
786+
for (nuint i = 0; i < numIters; i++)
788787
{
789-
ref ushort first = ref Unsafe.As<char, ushort>(ref Unsafe.Add(ref buf, offset));
790-
ref ushort last = ref Unsafe.As<char, ushort>(ref Unsafe.Add(ref buf, lastOffset));
788+
nuint firstOffset = i * numElements;
789+
nuint lastOffset = length - ((1 + i) * numElements);
791790

792-
Vector128<ushort> tempFirst = Vector128.LoadUnsafe(ref first);
793-
Vector128<ushort> tempLast = Vector128.LoadUnsafe(ref last);
791+
// Load in values from beginning and end of the array.
792+
Vector128<short> tempFirst = Vector128.LoadUnsafe(ref bufShort, firstOffset);
793+
Vector128<short> tempLast = Vector128.LoadUnsafe(ref bufShort, lastOffset);
794794

795795
// Shuffle to reverse each vector:
796796
// +-------------------------------+
@@ -800,25 +800,19 @@ public static void Reverse(ref char buf, nuint length)
800800
// +-------------------------------+
801801
// | H | G | F | E | D | C | B | A |
802802
// +-------------------------------+
803-
tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0));
804-
tempLast = Vector128.Shuffle(tempLast, Vector128.Create((ushort)7, 6, 5, 4, 3, 2, 1, 0));
803+
tempFirst = Vector128.Shuffle(tempFirst, Vector128.Create(7, 6, 5, 4, 3, 2, 1, 0));
804+
tempLast = Vector128.Shuffle(tempLast, Vector128.Create(7, 6, 5, 4, 3, 2, 1, 0));
805805

806806
// Store the reversed vectors
807-
tempLast.StoreUnsafe(ref first);
808-
tempFirst.StoreUnsafe(ref last);
809-
810-
offset += Vector128<ushort>.Count;
811-
lastOffset -= Vector128<ushort>.Count;
812-
} while (lastOffset >= offset);
813-
814-
remainder = (lastOffset + Vector128<ushort>.Count - offset);
815-
}
816-
817-
// Store any remaining values one-by-one
818-
if (remainder > 1)
819-
{
820-
ReverseInner(ref Unsafe.Add(ref buf, offset), (nuint)remainder);
807+
tempLast.StoreUnsafe(ref bufShort, firstOffset);
808+
tempFirst.StoreUnsafe(ref bufShort, lastOffset);
809+
}
810+
bufShort = ref Unsafe.Add(ref bufShort, numIters * numElements);
811+
length -= numIters * (nuint)Vector128<short>.Count * 2;
812+
// Store any remaining values one-by-one
813+
buf = ref Unsafe.As<short, char>(ref bufShort);
821814
}
815+
ReverseInner(ref buf, length);
822816
}
823817
}
824818
}

0 commit comments

Comments
 (0)