@@ -1352,6 +1352,8 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
13521352 Vector128 < byte > fourthByte = Vector128 . Create ( ( byte ) ( 0b11110000u - 0x80 ) ) ;
13531353 Vector128 < byte > v0f = Vector128 . Create ( ( byte ) 0x0F ) ;
13541354 Vector128 < byte > v80 = Vector128 . Create ( ( byte ) 0x80 ) ;
1355+ Vector128 < byte > fourthByteMinusOne = Vector128 . Create ( ( byte ) ( 0b11110000u - 1 ) ) ;
1356+ Vector128 < sbyte > largestcont = Vector128 . Create ( ( sbyte ) - 65 ) ; // -65 => 0b10111111
13551357 // Performance note: we could process 64 bytes at a time for better speed in some cases.
13561358 int start_point = processedLength ;
13571359
@@ -1362,13 +1364,13 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
13621364 {
13631365
13641366 Vector128 < byte > currentBlock = AdvSimd . LoadVector128 ( pInputBuffer + processedLength ) ;
1365- if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( AdvSimd . And ( currentBlock , v80 ) ) ) . ToScalar ( ) == 0 )
1367+ if ( ( currentBlock & v80 ) == Vector128 < byte > . Zero )
13661368 // We could also use (AdvSimd.Arm64.MaxAcross(currentBlock).ToScalar() <= 127) but it is slower on some
13671369 // hardware.
13681370 {
13691371 // We have an ASCII block, no need to process it, but
13701372 // we need to check if the previous block was incomplete.
1371- if ( AdvSimd . Arm64 . MaxAcross ( prevIncomplete ) . ToScalar ( ) != 0 )
1373+ if ( prevIncomplete != Vector128 < byte > . Zero )
13721374 {
13731375 int off = processedLength >= 3 ? processedLength - 3 : processedLength ;
13741376 byte * invalidBytePointer = SimdUnicode . UTF8 . SimpleRewindAndValidateWithErrors ( 16 - 3 , pInputBuffer + processedLength - 3 , inputLength - processedLength + 3 ) ;
@@ -1402,7 +1404,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
14021404 Vector128 < byte > block4 = AdvSimd . LoadVector128 ( pInputBuffer + processedLength + localasciirun + 48 ) ;
14031405 Vector128 < byte > or = AdvSimd . Or ( AdvSimd . Or ( block1 , block2 ) , AdvSimd . Or ( block3 , block4 ) ) ;
14041406
1405- if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( AdvSimd . And ( or , v80 ) ) ) . ToScalar ( ) != 0 )
1407+ if ( ( or & v80 ) != Vector128 < byte > . Zero )
14061408 {
14071409 break ;
14081410 }
@@ -1433,7 +1435,7 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
14331435 // AdvSimd.Arm64.MaxAcross(error) works, but it might be slower
14341436 // than AdvSimd.Arm64.MaxAcross(Vector128.AsUInt32(error)) on some
14351437 // hardware:
1436- if ( AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( error ) ) . ToScalar ( ) != 0 )
1438+ if ( error != Vector128 < byte > . Zero )
14371439 {
14381440 byte * invalidBytePointer ;
14391441 if ( processedLength == 0 )
@@ -1457,18 +1459,17 @@ private unsafe static (int utfadjust, int scalaradjust) calculateErrorPathadjust
14571459 return invalidBytePointer ;
14581460 }
14591461 prevIncomplete = AdvSimd . SubtractSaturate ( currentBlock , maxValue ) ;
1460- Vector128 < sbyte > largestcont = Vector128 . Create ( ( sbyte ) - 65 ) ; // -65 => 0b10111111
14611462 contbytes += - AdvSimd . Arm64 . AddAcross ( AdvSimd . CompareLessThanOrEqual ( Vector128 . AsSByte ( currentBlock ) , largestcont ) ) . ToScalar ( ) ;
1462-
1463- // computing n4 is more expensive than we would like:
1464- Vector128 < byte > fourthByteMinusOne = Vector128 . Create ( ( byte ) ( 0b11110000u - 1 ) ) ;
14651463 Vector128 < byte > largerthan0f = AdvSimd . CompareGreaterThan ( currentBlock , fourthByteMinusOne ) ;
1466- byte n4add = ( byte ) AdvSimd . Arm64 . AddAcross ( largerthan0f ) . ToScalar ( ) ;
1467- int negn4add = ( int ) ( byte ) - n4add ;
1468- n4 += negn4add ;
1464+ if ( largerthan0f != Vector128 < byte > . Zero )
1465+ {
1466+ byte n4add = ( byte ) AdvSimd . Arm64 . AddAcross ( largerthan0f ) . ToScalar ( ) ;
1467+ int negn4add = ( int ) ( byte ) - n4add ;
1468+ n4 += negn4add ;
1469+ }
14691470 }
14701471 }
1471- bool hasIncompete = AdvSimd . Arm64 . MaxAcross ( Vector128 . AsUInt32 ( prevIncomplete ) ) . ToScalar ( ) != 0 ;
1472+ bool hasIncompete = ( prevIncomplete != Vector128 < byte > . Zero ) ;
14721473 if ( processedLength < inputLength || hasIncompete )
14731474 {
14741475 byte * invalidBytePointer ;
0 commit comments