@@ -42,6 +42,28 @@ private static bool AllCharsInUInt64AreAscii(ulong value)
4242 return ( value & ~ 0x007F007F_007F007Ful ) == 0 ;
4343 }
4444
45+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
46+ public static int GetIndexOfFirstNonAsciiByteInLane ( Vector128 < byte > value , Vector128 < byte > bitmask )
47+ {
48+ if ( ! AdvSimd . Arm64 . IsSupported || ! BitConverter . IsLittleEndian )
49+ {
50+ throw new PlatformNotSupportedException ( ) ;
51+ }
52+
53+ // extractedBits[i] = (value[i] >> 7) & (1 << (12 * (i % 2)));
54+ Vector128 < byte > mostSignificantBitIsSet = AdvSimd . ShiftRightArithmetic ( value . AsSByte ( ) , 7 ) . AsByte ( ) ;
55+ Vector128 < byte > extractedBits = AdvSimd . And ( mostSignificantBitIsSet , bitmask ) ;
56+
57+ // collapse mask to lower bits
58+ extractedBits = AdvSimd . Arm64 . AddPairwise ( extractedBits , extractedBits ) ;
59+ ulong mask = extractedBits . AsUInt64 ( ) . ToScalar ( ) ;
60+
61+ // calculate the index
62+ int index = BitOperations . TrailingZeroCount ( mask ) >> 2 ;
63+ Debug . Assert ( ( mask != 0 ) ? index < 16 : index >= 16 ) ;
64+ return index ;
65+ }
66+
4567 /// <summary>
4668 /// Given a DWORD which represents two packed chars in machine-endian order,
4769 /// <see langword="true"/> iff the first char (in machine-endian order) is ASCII.
@@ -67,8 +89,8 @@ public static unsafe nuint GetIndexOfFirstNonAsciiByte(byte* pBuffer, nuint buff
6789 // pmovmskb which we know are optimized, and (b) we can avoid downclocking the processor while
6890 // this method is running.
6991
70- return ( Sse2 . IsSupported )
71- ? GetIndexOfFirstNonAsciiByte_Sse2 ( pBuffer , bufferLength )
92+ return ( Sse2 . IsSupported || AdvSimd . Arm64 . IsSupported && BitConverter . IsLittleEndian )
93+ ? GetIndexOfFirstNonAsciiByte_Sse2OrArm64 ( pBuffer , bufferLength )
7294 : GetIndexOfFirstNonAsciiByte_Default ( pBuffer , bufferLength ) ;
7395 }
7496
@@ -215,15 +237,29 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Default(byte* pBuffer, n
215237 goto Finish ;
216238 }
217239
218- private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2 ( byte * pBuffer , nuint bufferLength )
240+ private static bool ContainsNonAsciiByte ( uint currentMask )
241+ {
242+ if ( Sse2 . IsSupported )
243+ {
244+ return currentMask != 0 ;
245+ }
246+ else
247+ {
248+ return currentMask < 16 ;
249+ }
250+ }
251+
252+ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2OrArm64 ( byte * pBuffer , nuint bufferLength )
219253 {
220254 // JIT turns the below into constants
221255
222256 uint SizeOfVector128 = ( uint ) Unsafe . SizeOf < Vector128 < byte > > ( ) ;
223257 nuint MaskOfAllBitsInVector128 = ( nuint ) ( SizeOfVector128 - 1 ) ;
224258
225- Debug . Assert ( Sse2 . IsSupported , "Should've been checked by caller." ) ;
226- Debug . Assert ( BitConverter . IsLittleEndian , "SSE2 assumes little-endian." ) ;
259+ Debug . Assert ( Sse2 . IsSupported || AdvSimd . Arm64 . IsSupported , "Sse2 or AdvSimd64 required." ) ;
260+ Debug . Assert ( BitConverter . IsLittleEndian , "This SSE2/Arm64 implementation assumes little-endian." ) ;
261+
262+ Vector128 < byte > bitmask = Vector128 . Create ( ( ushort ) 0x1001 ) . AsByte ( ) ;
227263
228264 uint currentMask , secondMask ;
229265 byte * pOriginalBuffer = pBuffer ;
@@ -240,9 +276,20 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
240276
241277 // Read the first vector unaligned.
242278
243- currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . LoadVector128 ( pBuffer ) ) ; // unaligned load
279+ if ( Sse2 . IsSupported )
280+ {
281+ currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . LoadVector128 ( pBuffer ) ) ; // unaligned load
282+ }
283+ else if ( AdvSimd . Arm64 . IsSupported )
284+ {
285+ currentMask = ( uint ) GetIndexOfFirstNonAsciiByteInLane ( AdvSimd . LoadVector128 ( pBuffer ) , bitmask ) ; // unaligned load
286+ }
287+ else
288+ {
289+ throw new PlatformNotSupportedException ( ) ;
290+ }
244291
245- if ( currentMask != 0 )
292+ if ( ContainsNonAsciiByte ( currentMask ) )
246293 {
247294 goto FoundNonAsciiDataInCurrentMask ;
248295 }
@@ -281,13 +328,28 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
281328
282329 do
283330 {
284- Vector128 < byte > firstVector = Sse2 . LoadAlignedVector128 ( pBuffer ) ;
285- Vector128 < byte > secondVector = Sse2 . LoadAlignedVector128 ( pBuffer + SizeOfVector128 ) ;
331+ if ( Sse2 . IsSupported )
332+ {
333+ Vector128 < byte > firstVector = Sse2 . LoadAlignedVector128 ( pBuffer ) ;
334+ Vector128 < byte > secondVector = Sse2 . LoadAlignedVector128 ( pBuffer + SizeOfVector128 ) ;
286335
287- currentMask = ( uint ) Sse2 . MoveMask ( firstVector ) ;
288- secondMask = ( uint ) Sse2 . MoveMask ( secondVector ) ;
336+ currentMask = ( uint ) Sse2 . MoveMask ( firstVector ) ;
337+ secondMask = ( uint ) Sse2 . MoveMask ( secondVector ) ;
338+ }
339+ else if ( AdvSimd . Arm64 . IsSupported )
340+ {
341+ Vector128 < byte > firstVector = AdvSimd . LoadVector128 ( pBuffer ) ;
342+ Vector128 < byte > secondVector = AdvSimd . LoadVector128 ( pBuffer + SizeOfVector128 ) ;
289343
290- if ( ( currentMask | secondMask ) != 0 )
344+ currentMask = ( uint ) GetIndexOfFirstNonAsciiByteInLane ( firstVector , bitmask ) ;
345+ secondMask = ( uint ) GetIndexOfFirstNonAsciiByteInLane ( secondVector , bitmask ) ;
346+ }
347+ else
348+ {
349+ throw new PlatformNotSupportedException ( ) ;
350+ }
351+
352+ if ( ContainsNonAsciiByte ( currentMask | secondMask ) )
291353 {
292354 goto FoundNonAsciiDataInInnerLoop ;
293355 }
@@ -313,8 +375,20 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
313375 // At least one full vector's worth of data remains, so we can safely read it.
314376 // Remember, at this point pBuffer is still aligned.
315377
316- currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . LoadAlignedVector128 ( pBuffer ) ) ;
317- if ( currentMask != 0 )
378+ if ( Sse2 . IsSupported )
379+ {
380+ currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . LoadAlignedVector128 ( pBuffer ) ) ;
381+ }
382+ else if ( AdvSimd . Arm64 . IsSupported )
383+ {
384+ currentMask = ( uint ) GetIndexOfFirstNonAsciiByteInLane ( AdvSimd . LoadVector128 ( pBuffer ) , bitmask ) ;
385+ }
386+ else
387+ {
388+ throw new PlatformNotSupportedException ( ) ;
389+ }
390+
391+ if ( ContainsNonAsciiByte ( currentMask ) )
318392 {
319393 goto FoundNonAsciiDataInCurrentMask ;
320394 }
@@ -332,8 +406,20 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
332406
333407 pBuffer += ( bufferLength & MaskOfAllBitsInVector128 ) - SizeOfVector128 ;
334408
335- currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . LoadVector128 ( pBuffer ) ) ; // unaligned load
336- if ( currentMask != 0 )
409+ if ( Sse2 . IsSupported )
410+ {
411+ currentMask = ( uint ) Sse2 . MoveMask ( Sse2 . LoadVector128 ( pBuffer ) ) ; // unaligned load
412+ }
413+ else if ( AdvSimd . Arm64 . IsSupported )
414+ {
415+ currentMask = ( uint ) GetIndexOfFirstNonAsciiByteInLane ( AdvSimd . LoadVector128 ( pBuffer ) , bitmask ) ; // unaligned load
416+ }
417+ else
418+ {
419+ throw new PlatformNotSupportedException ( ) ;
420+ }
421+
422+ if ( ContainsNonAsciiByte ( currentMask ) )
337423 {
338424 goto FoundNonAsciiDataInCurrentMask ;
339425 }
@@ -342,7 +428,6 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
342428 }
343429
344430 Finish :
345-
346431 return ( nuint ) pBuffer - ( nuint ) pOriginalBuffer ; // and we're done!
347432
348433 FoundNonAsciiDataInInnerLoop :
@@ -351,7 +436,7 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
351436 // instead be the second mask. If so, skip the entire first mask and drain ASCII bytes
352437 // from the second mask.
353438
354- if ( currentMask == 0 )
439+ if ( ! ContainsNonAsciiByte ( currentMask ) )
355440 {
356441 pBuffer += SizeOfVector128 ;
357442 currentMask = secondMask ;
@@ -364,7 +449,18 @@ private static unsafe nuint GetIndexOfFirstNonAsciiByte_Sse2(byte* pBuffer, nuin
364449 // available, we'll fall back to a normal loop.
365450
366451 Debug . Assert ( currentMask != 0 , "Shouldn't be here unless we see non-ASCII data." ) ;
367- pBuffer += ( uint ) BitOperations . TrailingZeroCount ( currentMask ) ;
452+ if ( Sse2 . IsSupported )
453+ {
454+ pBuffer += ( uint ) BitOperations . TrailingZeroCount ( currentMask ) ;
455+ }
456+ else if ( AdvSimd . Arm64 . IsSupported )
457+ {
458+ pBuffer += currentMask ;
459+ }
460+ else
461+ {
462+ throw new PlatformNotSupportedException ( ) ;
463+ }
368464
369465 goto Finish ;
370466
0 commit comments