Skip to content

Commit 5d33b49

Browse files
author
Prashanth Govindarajan
committed
Arm64 intrinsics for GetIndexOfFirstNonAsciiByte
1 parent b262f0b commit 5d33b49

File tree

1 file changed

+115
-19
lines changed

1 file changed

+115
-19
lines changed

src/libraries/System.Private.CoreLib/src/System/Text/ASCIIUtility.cs

Lines changed: 115 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,28 @@ private static bool AllCharsInUInt64AreAscii(ulong value)
4242
return (value & ~0x007F007F_007F007Ful) == 0;
4343
}
4444

45+
[MethodImpl(MethodImplOptions.AggressiveInlining)]
46+
public static int GetIndexOfFirstNonAsciiByteInLane(Vector128<byte> value, Vector128<byte> bitmask)
47+
{
48+
if (!AdvSimd.Arm64.IsSupported || !BitConverter.IsLittleEndian)
49+
{
50+
throw new PlatformNotSupportedException();
51+
}
52+
53+
// extractedBits[i] = (value[i] >> 7) & (1 << (12 * (i % 2)));
54+
Vector128<byte> mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte();
55+
Vector128<byte> extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitmask);
56+
57+
// collapse mask to lower bits
58+
extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits);
59+
ulong mask = extractedBits.AsUInt64().ToScalar();
60+
61+
// calculate the index
62+
int index = BitOperations.TrailingZeroCount(mask) >> 2;
63+
Debug.Assert((mask != 0) ? index < 16 : index >= 16);
64+
return index;
65+
}
66+
4567
/// <summary>
4668
/// Given a DWORD which represents two packed chars in machine-endian order,
4769
/// <see langword="true"/> iff the first char (in machine-endian order) is ASCII.
@@ -67,8 +89,8 @@ public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint buff
6789
// pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
6890
// this method is running.
6991

70-
return (Sse2.IsSupported)
71-
? GetIndexOfFirstNonAsciiByte_Sse2(pBuffer, bufferLength)
92+
return (Sse2.IsSupported || AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian)
93+
? GetIndexOfFirstNonAsciiByte_Sse2OrArm64(pBuffer, bufferLength)
7294
: GetIndexOfFirstNonAsciiByte_Default(pBuffer, bufferLength);
7395
}
7496

@@ -215,15 +237,29 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n
215237
goto Finish;
216238
}
217239

218-
private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuint bufferLength)
240+
private static bool ContainsNonAsciiByte(uint currentMask)
241+
{
242+
if (Sse2.IsSupported)
243+
{
244+
return currentMask != 0;
245+
}
246+
else
247+
{
248+
return currentMask < 16;
249+
}
250+
}
251+
252+
private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2OrArm64(byte* pBuffer, nuint bufferLength)
219253
{
220254
// JIT turns the below into constants
221255

222256
uint SizeOfVector128 = (uint)Unsafe.SizeOf<Vector128<byte>>();
223257
nuint MaskOfAllBitsInVector128 = (nuint)(SizeOfVector128 - 1);
224258

225-
Debug.Assert(Sse2.IsSupported, "Should've been checked by caller.");
226-
Debug.Assert(BitConverter.IsLittleEndian, "SSE2 assumes little-endian.");
259+
Debug.Assert(Sse2.IsSupported || AdvSimd.Arm64.IsSupported, "Sse2 or AdvSimd64 required.");
260+
Debug.Assert(BitConverter.IsLittleEndian, "This SSE2/Arm64 implementation assumes little-endian.");
261+
262+
Vector128<byte> bitmask = Vector128.Create((ushort)0x1001).AsByte();
227263

228264
uint currentMask, secondMask;
229265
byte* pOriginalBuffer = pBuffer;
@@ -240,9 +276,20 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
240276

241277
// Read the first vector unaligned.
242278

243-
currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load
279+
if (Sse2.IsSupported)
280+
{
281+
currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load
282+
}
283+
else if (AdvSimd.Arm64.IsSupported)
284+
{
285+
currentMask = (uint)GetIndexOfFirstNonAsciiByteInLane(AdvSimd.LoadVector128(pBuffer), bitmask); // unaligned load
286+
}
287+
else
288+
{
289+
throw new PlatformNotSupportedException();
290+
}
244291

245-
if (currentMask != 0)
292+
if (ContainsNonAsciiByte(currentMask))
246293
{
247294
goto FoundNonAsciiDataInCurrentMask;
248295
}
@@ -281,13 +328,28 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
281328

282329
do
283330
{
284-
Vector128<byte> firstVector = Sse2.LoadAlignedVector128(pBuffer);
285-
Vector128<byte> secondVector = Sse2.LoadAlignedVector128(pBuffer + SizeOfVector128);
331+
if (Sse2.IsSupported)
332+
{
333+
Vector128<byte> firstVector = Sse2.LoadAlignedVector128(pBuffer);
334+
Vector128<byte> secondVector = Sse2.LoadAlignedVector128(pBuffer + SizeOfVector128);
286335

287-
currentMask = (uint)Sse2.MoveMask(firstVector);
288-
secondMask = (uint)Sse2.MoveMask(secondVector);
336+
currentMask = (uint)Sse2.MoveMask(firstVector);
337+
secondMask = (uint)Sse2.MoveMask(secondVector);
338+
}
339+
else if (AdvSimd.Arm64.IsSupported)
340+
{
341+
Vector128<byte> firstVector = AdvSimd.LoadVector128(pBuffer);
342+
Vector128<byte> secondVector = AdvSimd.LoadVector128(pBuffer + SizeOfVector128);
289343

290-
if ((currentMask | secondMask) != 0)
344+
currentMask = (uint)GetIndexOfFirstNonAsciiByteInLane(firstVector, bitmask);
345+
secondMask = (uint)GetIndexOfFirstNonAsciiByteInLane(secondVector, bitmask);
346+
}
347+
else
348+
{
349+
throw new PlatformNotSupportedException();
350+
}
351+
352+
if (ContainsNonAsciiByte(currentMask | secondMask))
291353
{
292354
goto FoundNonAsciiDataInInnerLoop;
293355
}
@@ -313,8 +375,20 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
313375
// At least one full vector's worth of data remains, so we can safely read it.
314376
// Remember, at this point pBuffer is still aligned.
315377

316-
currentMask = (uint)Sse2.MoveMask(Sse2.LoadAlignedVector128(pBuffer));
317-
if (currentMask != 0)
378+
if (Sse2.IsSupported)
379+
{
380+
currentMask = (uint)Sse2.MoveMask(Sse2.LoadAlignedVector128(pBuffer));
381+
}
382+
else if (AdvSimd.Arm64.IsSupported)
383+
{
384+
currentMask = (uint)GetIndexOfFirstNonAsciiByteInLane(AdvSimd.LoadVector128(pBuffer), bitmask);
385+
}
386+
else
387+
{
388+
throw new PlatformNotSupportedException();
389+
}
390+
391+
if (ContainsNonAsciiByte(currentMask))
318392
{
319393
goto FoundNonAsciiDataInCurrentMask;
320394
}
@@ -332,8 +406,20 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
332406

333407
pBuffer += (bufferLength & MaskOfAllBitsInVector128) - SizeOfVector128;
334408

335-
currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load
336-
if (currentMask != 0)
409+
if (Sse2.IsSupported)
410+
{
411+
currentMask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pBuffer)); // unaligned load
412+
}
413+
else if (AdvSimd.Arm64.IsSupported)
414+
{
415+
currentMask = (uint)GetIndexOfFirstNonAsciiByteInLane(AdvSimd.LoadVector128(pBuffer), bitmask); // unaligned load
416+
}
417+
else
418+
{
419+
throw new PlatformNotSupportedException();
420+
}
421+
422+
if (ContainsNonAsciiByte(currentMask))
337423
{
338424
goto FoundNonAsciiDataInCurrentMask;
339425
}
@@ -342,7 +428,6 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
342428
}
343429

344430
Finish:
345-
346431
return (nuint)pBuffer - (nuint)pOriginalBuffer; // and we're done!
347432

348433
FoundNonAsciiDataInInnerLoop:
@@ -351,7 +436,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
351436
// instead be the second mask. If so, skip the entire first mask and drain ASCII bytes
352437
// from the second mask.
353438

354-
if (currentMask == 0)
439+
if (!ContainsNonAsciiByte(currentMask))
355440
{
356441
pBuffer += SizeOfVector128;
357442
currentMask = secondMask;
@@ -364,7 +449,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
364449
// available, we'll fall back to a normal loop.
365450

366451
Debug.Assert(currentMask != 0, "Shouldn't be here unless we see non-ASCII data.");
367-
pBuffer += (uint)BitOperations.TrailingZeroCount(currentMask);
452+
if (Sse2.IsSupported)
453+
{
454+
pBuffer += (uint)BitOperations.TrailingZeroCount(currentMask);
455+
}
456+
else if (AdvSimd.Arm64.IsSupported)
457+
{
458+
pBuffer += currentMask;
459+
}
460+
else
461+
{
462+
throw new PlatformNotSupportedException();
463+
}
368464

369465
goto Finish;
370466

0 commit comments

Comments
 (0)