From d946c3b3ee24ab2cdc6704a115ab2c570c2c3845 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 12:00:23 +0200 Subject: [PATCH 01/12] Revert "[Mono] Restore old code to solve the recent SpanHelpers regressions (#75917)" This reverts commit 254844a700179bb3b39149db1946691e3696d6fc. --- .../System.Private.CoreLib.Shared.projitems | 3 - .../src/System/MemoryExtensions.cs | 2 - .../src/System/SpanHelpers.Mono.cs | 2697 ----------------- .../src/System/SpanHelpers.T.cs | 16 - 4 files changed, 2718 deletions(-) delete mode 100644 src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index f1070474198a3e..4c645de0cc7ec6 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -2506,7 +2506,4 @@ - - - \ No newline at end of file diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index 9bb51d939f255e..f2fcd1ae1a82c7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -1927,7 +1927,6 @@ public static int LastIndexOfAny(this ReadOnlySpan span, ReadOnlySpan v Unsafe.Add(ref valueRef, 2), span.Length); -#if !MONO // We don't have a mono overload for 4 values case 4: return SpanHelpers.LastIndexOfAnyValueType( ref spanRef, @@ -1936,7 +1935,6 @@ public static int LastIndexOfAny(this ReadOnlySpan span, ReadOnlySpan v Unsafe.Add(ref valueRef, 2), Unsafe.Add(ref valueRef, 3), span.Length); -#endif default: return ProbabilisticMap.LastIndexOfAny(ref Unsafe.As(ref spanRef), span.Length, ref Unsafe.As(ref valueRef), values.Length); diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs deleted file mode 100644 index d6a7f09e7465b1..00000000000000 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Mono.cs +++ /dev/null @@ -1,2697 +0,0 @@ -// Licensed to the .NET Foundation under one or more agreements. -// The .NET Foundation licenses this file to you under the MIT license. - -using System.Diagnostics; -using System.Numerics; -using System.Runtime.CompilerServices; -using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; -using System.Runtime.Intrinsics.Arm; -using System.Runtime.Intrinsics.X86; - -namespace System -{ - internal static partial class SpanHelpers // helpers used by Mono - { - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfValueType(ref byte searchSpace, byte value, int length) - { - Debug.Assert(length >= 0); - - uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector128.IsHardwareAccelerated) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - if (length >= Vector128.Count * 2) - { - lengthToExamine = UnalignedCountVector128(ref searchSpace); - } - } - else if (Vector.IsHardwareAccelerated) - { - if (length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVector(ref searchSpace); - } - } - SequentialScan: - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4)) - goto Found4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5)) - goto Found5; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6)) - goto Found6; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7)) - goto Found7; - - offset += 8; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - - offset += 4; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - - offset += 1; - } - - // We get past SequentialScan only if IsHardwareAccelerated is true; and remain length is greater than Vector length. - // However, we still have the redundant check to allow the JIT to see that the code is unreachable and eliminate it when the platform does not - // have hardware accelerated. After processing Vector lengths we return to SequentialScan to finish any remaining. - if (Vector256.IsHardwareAccelerated) - { - if (offset < (nuint)(uint)length) - { - if ((((nuint)(uint)Unsafe.AsPointer(ref searchSpace) + offset) & (nuint)(Vector256.Count - 1)) != 0) - { - // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches - // with no upper bound e.g. String.strlen. - // Start with a check on Vector128 to align to Vector256, before moving to processing Vector256. - // This ensures we do not fault across memory pages while searching for an end of string. - Vector128 values = Vector128.Create(value); - Vector128 search = Vector128.LoadUnsafe(ref searchSpace, offset); - - // Same method as below - uint matches = Vector128.Equals(values, search).ExtractMostSignificantBits(); - if (matches == 0) - { - // Zero flags set so no matches - offset += (nuint)Vector128.Count; - } - else - { - // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } - } - - lengthToExamine = GetByteVector256SpanLength(offset, length); - if (lengthToExamine > offset) - { - Vector256 values = Vector256.Create(value); - do - { - Vector256 search = Vector256.LoadUnsafe(ref searchSpace, offset); - uint matches = Vector256.Equals(values, search).ExtractMostSignificantBits(); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // Zero flags set so no matches - offset += (nuint)Vector256.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } while (lengthToExamine > offset); - } - - lengthToExamine = GetByteVector128SpanLength(offset, length); - if (lengthToExamine > offset) - { - Vector128 values = Vector128.Create(value); - Vector128 search = Vector128.LoadUnsafe(ref searchSpace, offset); - - // Same method as above - uint matches = Vector128.Equals(values, search).ExtractMostSignificantBits(); - if (matches == 0) - { - // Zero flags set so no matches - offset += (nuint)Vector128.Count; - } - else - { - // Find bitflag offset of first match and add to current offset - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } - } - - if (offset < (nuint)(uint)length) - { - lengthToExamine = ((nuint)(uint)length - offset); - goto SequentialScan; - } - } - } - else if (Vector128.IsHardwareAccelerated) - { - if (offset < (nuint)(uint)length) - { - lengthToExamine = GetByteVector128SpanLength(offset, length); - - Vector128 values = Vector128.Create(value); - while (lengthToExamine > offset) - { - Vector128 search = Vector128.LoadUnsafe(ref searchSpace, offset); - - // Same method as above - Vector128 compareResult = Vector128.Equals(values, search); - if (compareResult == Vector128.Zero) - { - // Zero flags set so no matches - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - uint matches = compareResult.ExtractMostSignificantBits(); - return (int)(offset + (uint)BitOperations.TrailingZeroCount(matches)); - } - - if (offset < (nuint)(uint)length) - { - lengthToExamine = ((nuint)(uint)length - offset); - goto SequentialScan; - } - } - } - else if (Vector.IsHardwareAccelerated) - { - if (offset < (nuint)(uint)length) - { - lengthToExamine = GetByteVectorSpanLength(offset, length); - - Vector values = new Vector(value); - - while (lengthToExamine > offset) - { - var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset)); - if (Vector.Zero.Equals(matches)) - { - offset += (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)offset + LocateFirstFoundByte(matches); - } - - if (offset < (nuint)(uint)length) - { - lengthToExamine = ((nuint)(uint)length - offset); - goto SequentialScan; - } - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfValueType(ref short searchSpace, short value, int length) - => IndexOfChar(ref Unsafe.As(ref searchSpace), Unsafe.As(ref value), length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfChar(ref char searchSpace, char value, int length) - { - Debug.Assert(length >= 0); - - nint offset = 0; - nint lengthToExamine = length; - - if (((int)Unsafe.AsPointer(ref searchSpace) & 1) != 0) - { - // Input isn't char aligned, we won't be able to align it to a Vector - } - else if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - // Needs to be double length to allow us to align the data first. - if (length >= Vector128.Count * 2) - { - lengthToExamine = UnalignedCountVector128(ref searchSpace); - } - } - else if (Vector.IsHardwareAccelerated) - { - // Needs to be double length to allow us to align the data first. - if (length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVector(ref searchSpace); - } - } - - SequentialScan: - // In the non-vector case lengthToExamine is the total length. - // In the vector case lengthToExamine first aligns to Vector, - // then in a second pass after the Vector lengths is the - // remaining data that is shorter than a Vector length. - while (lengthToExamine >= 4) - { - ref char current = ref Unsafe.Add(ref searchSpace, offset); - - if (value == current) - goto Found; - if (value == Unsafe.Add(ref current, 1)) - goto Found1; - if (value == Unsafe.Add(ref current, 2)) - goto Found2; - if (value == Unsafe.Add(ref current, 3)) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - if (value == Unsafe.Add(ref searchSpace, offset)) - goto Found; - - offset++; - lengthToExamine--; - } - - // We get past SequentialScan only if IsHardwareAccelerated or intrinsic .IsSupported is true. However, we still have the redundant check to allow - // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. - if (Avx2.IsSupported) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector128.Count); - if (((nint)Unsafe.AsPointer(ref Unsafe.Add(ref searchSpace, (nint)offset)) & (nint)(Vector256.Count - 1)) != 0) - { - // Not currently aligned to Vector256 (is aligned to Vector128); this can cause a problem for searches - // with no upper bound e.g. String.wcslen. Start with a check on Vector128 to align to Vector256, - // before moving to processing Vector256. - - // If the input searchSpan has been fixed or pinned, this ensures we do not fault across memory pages - // while searching for an end of string. Specifically that this assumes that the length is either correct - // or that the data is pinned otherwise it may cause an AccessViolation from crossing a page boundary into an - // unowned page. If the search is unbounded (e.g. null terminator in wcslen) and the search value is not found, - // again this will likely cause an AccessViolation. However, correctly bounded searches will return -1 rather - // than ever causing an AV. - - // If the searchSpan has not been fixed or pinned the GC can relocate it during the execution of this - // method, so the alignment only acts as best endeavour. The GC cost is likely to dominate over - // the misalignment that may occur after; to we default to giving the GC a free hand to relocate and - // its up to the caller whether they are operating over fixed data. - Vector128 values = Vector128.Create((ushort)value); - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as below - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector128.Count; - } - else - { - // Find bitflag offset of first match and add to current offset - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } - } - - lengthToExamine = GetCharVector256SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector256 values = Vector256.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector256.Count); - - Vector256 search = LoadVector256(ref searchSpace, offset); - int matches = Avx2.MoveMask(Avx2.CompareEqual(values, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector256.Count; - lengthToExamine -= Vector256.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } while (lengthToExamine > 0); - } - - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) - { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 values = Vector128.Create((ushort)value); - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as above - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector128.Count; - // Don't need to change lengthToExamine here as we don't use its current value again. - } - else - { - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - else if (Sse2.IsSupported) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector128.Count); - - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector128 values = Vector128.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 search = LoadVector128(ref searchSpace, offset); - - // Same method as above - int matches = Sse2.MoveMask(Sse2.CompareEqual(values, search).AsByte()); - if (matches == 0) - { - // Zero flags set so no matches - offset += Vector128.Count; - lengthToExamine -= Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset, - // flags are in bytes so divide for chars - return (int)(offset + ((uint)BitOperations.TrailingZeroCount(matches) / sizeof(char))); - } while (lengthToExamine > 0); - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - else if (AdvSimd.Arm64.IsSupported) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector128.Count); - - lengthToExamine = GetCharVector128SpanLength(offset, length); - if (lengthToExamine > 0) - { - Vector128 values = Vector128.Create((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector128.Count); - - Vector128 search = LoadVector128(ref searchSpace, offset); - Vector128 compareResult = AdvSimd.CompareEqual(values, search); - - if (compareResult == Vector128.Zero) - { - offset += Vector128.Count; - lengthToExamine -= Vector128.Count; - continue; - } - - return (int)(offset + FindFirstMatchedLane(compareResult)); - } while (lengthToExamine > 0); - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - else if (Vector.IsHardwareAccelerated) - { - if (offset < length) - { - Debug.Assert(length - offset >= Vector.Count); - - lengthToExamine = GetCharVectorSpanLength(offset, length); - - if (lengthToExamine > 0) - { - Vector values = new Vector((ushort)value); - do - { - Debug.Assert(lengthToExamine >= Vector.Count); - - var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset)); - if (Vector.Zero.Equals(matches)) - { - offset += Vector.Count; - lengthToExamine -= Vector.Count; - continue; - } - - // Find offset of first match - return (int)(offset + LocateFirstFoundChar(matches)); - } while (lengthToExamine > 0); - } - - if (offset < length) - { - lengthToExamine = length - offset; - goto SequentialScan; - } - } - } - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)(offset); - } - - internal static unsafe int IndexOfValueType(ref T searchSpace, T value, int length) where T : struct, IEquatable - { - Debug.Assert(length >= 0); - - nint index = 0; // Use nint for arithmetic to avoid unnecessary 64->32->64 truncations - if (Vector.IsHardwareAccelerated && Vector.IsSupported && (Vector.Count * 2) <= length) - { - Vector valueVector = new Vector(value); - Vector compareVector; - Vector matchVector; - if ((uint)length % (uint)Vector.Count != 0) - { - // Number of elements is not a multiple of Vector.Count, so do one - // check and shift only enough for the remaining set to be a multiple - // of Vector.Count. - compareVector = Unsafe.As>(ref Unsafe.Add(ref searchSpace, index)); - matchVector = Vector.Equals(valueVector, compareVector); - if (matchVector != Vector.Zero) - { - goto VectorMatch; - } - index += length % Vector.Count; - length -= length % Vector.Count; - } - while (length > 0) - { - compareVector = Unsafe.As>(ref Unsafe.Add(ref searchSpace, index)); - matchVector = Vector.Equals(valueVector, compareVector); - if (matchVector != Vector.Zero) - { - goto VectorMatch; - } - index += Vector.Count; - length -= Vector.Count; - } - goto NotFound; - VectorMatch: - for (int i = 0; i < Vector.Count; i++) - if (compareVector[i].Equals(value)) - return (int)(index + i); - } - - while (length >= 8) - { - if (value.Equals(Unsafe.Add(ref searchSpace, index))) - goto Found; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 1))) - goto Found1; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 2))) - goto Found2; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 3))) - goto Found3; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 4))) - goto Found4; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 5))) - goto Found5; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 6))) - goto Found6; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 7))) - goto Found7; - - length -= 8; - index += 8; - } - - while (length >= 4) - { - if (value.Equals(Unsafe.Add(ref searchSpace, index))) - goto Found; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 1))) - goto Found1; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 2))) - goto Found2; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 3))) - goto Found3; - - length -= 4; - index += 4; - } - - while (length > 0) - { - if (value.Equals(Unsafe.Add(ref searchSpace, index))) - goto Found; - - index += 1; - length--; - } - NotFound: - return -1; - - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)index; - Found1: - return (int)(index + 1); - Found2: - return (int)(index + 2); - Found3: - return (int)(index + 3); - Found4: - return (int)(index + 4); - Found5: - return (int)(index + 5); - Found6: - return (int)(index + 6); - Found7: - return (int)(index + 7); - } - - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, int length) where T : struct, IEquatable - { - Debug.Assert(length >= 0, "Expected non-negative length"); - Debug.Assert(value0 is byte or short or int or long, "Expected caller to normalize to one of these types"); - - if (!Vector128.IsHardwareAccelerated || length < Vector128.Count) - { - for (int i = 0; i < length; i++) - { - if (!Unsafe.Add(ref searchSpace, i).Equals(value0)) - { - return i; - } - } - } - else - { - Vector128 notEquals, value0Vector = Vector128.Create(value0); - ref T current = ref searchSpace; - ref T oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, length - Vector128.Count); - - // Loop until either we've finished all elements or there's less than a vector's-worth remaining. - do - { - notEquals = ~Vector128.Equals(value0Vector, Vector128.LoadUnsafe(ref current)); - if (notEquals != Vector128.Zero) - { - return ComputeIndex(ref searchSpace, ref current, notEquals); - } - - current = ref Unsafe.Add(ref current, Vector128.Count); - } - while (!Unsafe.IsAddressGreaterThan(ref current, ref oneVectorAwayFromEnd)); - - // If any elements remain, process the last vector in the search space. - if ((uint)length % Vector128.Count != 0) - { - notEquals = ~Vector128.Equals(value0Vector, Vector128.LoadUnsafe(ref oneVectorAwayFromEnd)); - if (notEquals != Vector128.Zero) - { - return ComputeIndex(ref searchSpace, ref oneVectorAwayFromEnd, notEquals); - } - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - static int ComputeIndex(ref T searchSpace, ref T current, Vector128 notEquals) - { - uint notEqualsElements = notEquals.ExtractMostSignificantBits(); - int index = BitOperations.TrailingZeroCount(notEqualsElements); - return index + (int)(Unsafe.ByteOffset(ref searchSpace, ref current) / Unsafe.SizeOf()); - } - } - - return -1; - } - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static int LastIndexOfValueType(ref byte searchSpace, byte value, int length) - { - Debug.Assert(length >= 0); - - uint uValue = value; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = (nuint)(uint)length; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVectorFromEnd(ref searchSpace, length); - } - SequentialScan: - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - offset -= 8; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7)) - goto Found7; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6)) - goto Found6; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5)) - goto Found5; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4)) - goto Found4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - offset -= 4; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) - goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) - goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) - goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - offset -= 1; - - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) - goto Found; - } - - if (Vector.IsHardwareAccelerated && (offset > 0)) - { - lengthToExamine = (offset & (nuint)~(Vector.Count - 1)); - - Vector values = new Vector(value); - - while (lengthToExamine > (nuint)(Vector.Count - 1)) - { - var matches = Vector.Equals(values, LoadVector(ref searchSpace, offset - (nuint)Vector.Count)); - if (Vector.Zero.Equals(matches)) - { - offset -= (nuint)Vector.Count; - lengthToExamine -= (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); - } - if (offset > 0) - { - lengthToExamine = offset; - goto SequentialScan; - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int LastIndexOfValueType(ref short searchSpace, short value, int length) - => LastIndexOfValueType(ref Unsafe.As(ref searchSpace), Unsafe.As(ref value), length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int LastIndexOfValueType(ref char searchSpace, char value, int length) - { - Debug.Assert(length >= 0); - - fixed (char* pChars = &searchSpace) - { - char* pCh = pChars + length; - char* pEndCh = pChars; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - // Figure out how many characters to read sequentially from the end until we are vector aligned - // This is equivalent to: length = ((int)pCh % Unsafe.SizeOf>()) / elementsPerByte - const int elementsPerByte = sizeof(ushort) / sizeof(byte); - length = ((int)pCh & (Unsafe.SizeOf>() - 1)) / elementsPerByte; - } - - SequentialScan: - while (length >= 4) - { - length -= 4; - pCh -= 4; - - if (*(pCh + 3) == value) - goto Found3; - if (*(pCh + 2) == value) - goto Found2; - if (*(pCh + 1) == value) - goto Found1; - if (*pCh == value) - goto Found; - } - - while (length > 0) - { - length--; - pCh--; - - if (*pCh == value) - goto Found; - } - - // We get past SequentialScan only if IsHardwareAccelerated is true. However, we still have the redundant check to allow - // the JIT to see that the code is unreachable and eliminate it when the platform does not have hardware accelerated. - if (Vector.IsHardwareAccelerated && pCh > pEndCh) - { - // Get the highest multiple of Vector.Count that is within the search space. - // That will be how many times we iterate in the loop below. - // This is equivalent to: length = Vector.Count * ((int)(pCh - pEndCh) / Vector.Count) - length = (int)((pCh - pEndCh) & ~(Vector.Count - 1)); - - // Get comparison Vector - Vector vComparison = new Vector(value); - - while (length > 0) - { - char* pStart = pCh - Vector.Count; - // Using Unsafe.Read instead of ReadUnaligned since the search space is pinned and pCh (and hence pSart) is always vector aligned - Debug.Assert(((int)pStart & (Unsafe.SizeOf>() - 1)) == 0); - Vector vMatches = Vector.Equals(vComparison, Unsafe.Read>(pStart)); - if (Vector.Zero.Equals(vMatches)) - { - pCh -= Vector.Count; - length -= Vector.Count; - continue; - } - // Find offset of last match - return (int)(pStart - pEndCh) + LocateLastFoundChar(vMatches); - } - - if (pCh > pEndCh) - { - length = (int)(pCh - pEndCh); - goto SequentialScan; - } - } - - return -1; - Found: - return (int)(pCh - pEndCh); - Found1: - return (int)(pCh - pEndCh) + 1; - Found2: - return (int)(pCh - pEndCh) + 2; - Found3: - return (int)(pCh - pEndCh) + 3; - } - } - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int LastIndexOfValueType(ref T searchSpace, T value, int length) where T : IEquatable? - => LastIndexOf(ref searchSpace, value, length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static int IndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported, and length is enough to use them so use that path. - // We jump forward to the intrinsics at the end of the method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, as it is used later - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found7; - - offset += 8; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - - offset += 4; - } - - while (lengthToExamine > 0) - { - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create(value0); - Vector256 values1 = Vector256.Create(value1); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchSpace, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset - offset += (nuint)BitOperations.TrailingZeroCount(matches); - goto Found; - } - else if (AdvSimd.Arm64.IsSupported) - { - Vector128 search; - Vector128 matches; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)); - - if (matches == Vector128.Zero) - { - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)); - - if (matches == Vector128.Zero) - { - // None matched - goto NotFound; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - else if (Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchSpace, offset); - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)LocateFirstFoundByte(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfAnyValueType(ref short searchSpace, short value0, short value1, int length) - => IndexOfAnyChar(ref Unsafe.As(ref searchSpace), Unsafe.As(ref value0), Unsafe.As(ref value1), length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfAnyChar(ref char searchStart, char value0, char value1, int length) - { - Debug.Assert(length >= 0); - - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. - // We jump forward to the intrinsics at the end of them method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto VectorCompare; - } - } - - int lookUp; - while (lengthToExamine >= 4) - { - ref char current = ref Add(ref searchStart, offset); - - lookUp = current; - if (value0 == lookUp || value1 == lookUp) - goto Found; - lookUp = Unsafe.Add(ref current, 1); - if (value0 == lookUp || value1 == lookUp) - goto Found1; - lookUp = Unsafe.Add(ref current, 2); - if (value0 == lookUp || value1 == lookUp) - goto Found2; - lookUp = Unsafe.Add(ref current, 3); - if (value0 == lookUp || value1 == lookUp) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - lookUp = Add(ref searchStart, offset); - if (value0 == lookUp || value1 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)offset; - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create((ushort)value0); - Vector256 values1 = Vector256.Create((ushort)value1); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchStart, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create((ushort)value0); - Vector128 values1 = Vector128.Create((ushort)value1); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset, - // flags are in bytes so divide by 2 for chars (shift right by 1) - offset += (nuint)(uint)BitOperations.TrailingZeroCount(matches) >> 1; - goto Found; - } - - VectorCompare: - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchStart, offset); - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchStart, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)(uint)LocateFirstFoundChar(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) - => IndexOfAnyExcept(ref searchSpace, value0, value1, length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static int IndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, byte value2, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue2 = value2; // Use uint for comparisons to avoid unnecessary 8->32 extensions - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported || AdvSimd.Arm64.IsSupported) - { - // Avx2 branch also operates on Sse2 sizes, so check is combined. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported, and length is enough to use them so use that path. - // We jump forward to the intrinsics at the end of the method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, as it is used later - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found7; - - offset += 8; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - - offset += 4; - } - - while (lengthToExamine > 0) - { - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create(value0); - Vector256 values1 = Vector256.Create(value1); - Vector256 values2 = Vector256.Create(value2); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchSpace, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - Vector128 values2 = Vector128.Create(value2); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search))); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search))); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset - offset += (nuint)BitOperations.TrailingZeroCount(matches); - goto Found; - } - else if (AdvSimd.Arm64.IsSupported) - { - Vector128 search; - Vector128 matches; - Vector128 values0 = Vector128.Create(value0); - Vector128 values1 = Vector128.Create(value1); - Vector128 values2 = Vector128.Create(value2); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchSpace, offset); - - matches = AdvSimd.Or( - AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)), - AdvSimd.CompareEqual(values2, search)); - - if (matches == Vector128.Zero) - { - offset += (nuint)Vector128.Count; - continue; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - - // Move to Vector length from end for final compare - search = LoadVector128(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = AdvSimd.Or( - AdvSimd.Or( - AdvSimd.CompareEqual(values0, search), - AdvSimd.CompareEqual(values1, search)), - AdvSimd.CompareEqual(values2, search)); - - if (matches == Vector128.Zero) - { - // None matched - goto NotFound; - } - - // Find bitflag offset of first match and add to current offset - offset += FindFirstMatchedLane(matches); - - goto Found; - } - else if (Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchSpace, offset); - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchSpace, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)LocateFirstFoundByte(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfAnyValueType(ref short searchSpace, short value0, short value1, short value2, int length) - => IndexOfAnyValueType( - ref Unsafe.As(ref searchSpace), - Unsafe.As(ref value0), - Unsafe.As(ref value1), - Unsafe.As(ref value2), - length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfAnyValueType(ref char searchStart, char value0, char value1, char value2, int length) - { - Debug.Assert(length >= 0); - - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. - // We jump forward to the intrinsics at the end of them method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto VectorCompare; - } - } - - int lookUp; - while (lengthToExamine >= 4) - { - ref char current = ref Add(ref searchStart, offset); - - lookUp = current; - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found; - lookUp = Unsafe.Add(ref current, 1); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found1; - lookUp = Unsafe.Add(ref current, 2); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found2; - lookUp = Unsafe.Add(ref current, 3); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - lookUp = Add(ref searchStart, offset); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)offset; - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create((ushort)value0); - Vector256 values1 = Vector256.Create((ushort)value1); - Vector256 values2 = Vector256.Create((ushort)value2); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchStart, offset); - // Bitwise Or to combine the flagged matches for the second value to our match flags - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask( - Avx2.Or( - Avx2.Or( - Avx2.CompareEqual(values0, search), - Avx2.CompareEqual(values1, search)), - Avx2.CompareEqual(values2, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create((ushort)value0); - Vector128 values1 = Vector128.Create((ushort)value1); - Vector128 values2 = Vector128.Create((ushort)value2); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search)) - .AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask( - Sse2.Or( - Sse2.Or( - Sse2.CompareEqual(values0, search), - Sse2.CompareEqual(values1, search)), - Sse2.CompareEqual(values2, search)) - .AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset, - // flags are in bytes so divide by 2 for chars (shift right by 1) - offset += (nuint)(uint)BitOperations.TrailingZeroCount(matches) >> 1; - goto Found; - } - - VectorCompare: - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchStart, offset); - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchStart, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)(uint)LocateFirstFoundChar(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) - => IndexOfAnyExcept(ref searchSpace, value0, value1, value2, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static unsafe int IndexOfAnyValueType(ref short searchSpace, short value0, short value1, short value2, short value3, int length) - => IndexOfAnyValueType( - ref Unsafe.As(ref searchSpace), - Unsafe.As(ref value0), - Unsafe.As(ref value1), - Unsafe.As(ref value2), - Unsafe.As(ref value3), - length); - - [MethodImpl(MethodImplOptions.AggressiveOptimization)] - internal static unsafe int IndexOfAnyValueType(ref char searchStart, char value0, char value1, char value2, char value3, int length) - { - Debug.Assert(length >= 0); - - nuint offset = 0; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Sse2.IsSupported) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector128.Count; - if (vectorDiff >= 0) - { - // >= Sse2 intrinsics are supported and length is enough to use them, so use that path. - // We jump forward to the intrinsics at the end of them method so a naive branch predict - // will choose the non-intrinsic path so short lengths which don't gain anything aren't - // overly disadvantaged by having to jump over a lot of code. Whereas the longer lengths - // more than make this back from the intrinsics. - lengthToExamine = (nuint)vectorDiff; - goto IntrinsicsCompare; - } - } - else if (Vector.IsHardwareAccelerated) - { - // Calculate lengthToExamine here for test, rather than just testing as it used later, rather than doing it twice. - nint vectorDiff = (nint)length - Vector.Count; - if (vectorDiff >= 0) - { - // Similar as above for Vector version - lengthToExamine = (nuint)vectorDiff; - goto VectorCompare; - } - } - - int lookUp; - while (lengthToExamine >= 4) - { - ref char current = ref Add(ref searchStart, offset); - - lookUp = current; - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found; - lookUp = Unsafe.Add(ref current, 1); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found1; - lookUp = Unsafe.Add(ref current, 2); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found2; - lookUp = Unsafe.Add(ref current, 3); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found3; - - offset += 4; - lengthToExamine -= 4; - } - - while (lengthToExamine > 0) - { - lookUp = Add(ref searchStart, offset); - if (value0 == lookUp || value1 == lookUp || value2 == lookUp || value3 == lookUp) - goto Found; - - offset += 1; - lengthToExamine -= 1; - } - - NotFound: - return -1; - Found3: - return (int)(offset + 3); - Found2: - return (int)(offset + 2); - Found1: - return (int)(offset + 1); - Found: - return (int)offset; - - IntrinsicsCompare: - // When we move into a Vectorized block, we process everything of Vector size; - // and then for any remainder we do a final compare of Vector size but starting at - // the end and forwards, which may overlap on an earlier compare. - - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (Sse2.IsSupported) - { - int matches; - if (Avx2.IsSupported) - { - Vector256 search; - // Guard as we may only have a valid size for Vector128; when we will move to the Sse2 - // We have already subtracted Vector128.Count from lengthToExamine so compare against that - // to see if we have double the size for Vector256.Count - if (lengthToExamine >= (nuint)Vector128.Count) - { - Vector256 values0 = Vector256.Create((ushort)value0); - Vector256 values1 = Vector256.Create((ushort)value1); - Vector256 values2 = Vector256.Create((ushort)value2); - Vector256 values3 = Vector256.Create((ushort)value3); - - // Subtract Vector128.Count so we have now subtracted Vector256.Count - lengthToExamine -= (nuint)Vector128.Count; - // First time this checks again against 0, however we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector256(ref searchStart, offset); - // We preform the Or at non-Vector level as we are using the maximum number of non-preserved registers, - // and more causes them first to be pushed to stack and then popped on exit to preseve their values. - matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); - // Bitwise Or to combine the flagged matches for the second, third and fourth values to our match flags - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector256.Count; - continue; - } - - goto IntrinsicsMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector256(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Avx2.MoveMask(Avx2.CompareEqual(values0, search).AsByte()); - // Bitwise Or to combine the flagged matches for the second, third and fourth values to our match flags - matches |= Avx2.MoveMask(Avx2.CompareEqual(values1, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values2, search).AsByte()); - matches |= Avx2.MoveMask(Avx2.CompareEqual(values3, search).AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - - goto IntrinsicsMatch; - } - } - - // Initial size check was done on method entry. - Debug.Assert(length >= Vector128.Count); - { - Vector128 search; - Vector128 values0 = Vector128.Create((ushort)value0); - Vector128 values1 = Vector128.Create((ushort)value1); - Vector128 values2 = Vector128.Create((ushort)value2); - Vector128 values3 = Vector128.Create((ushort)value3); - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector128(ref searchStart, offset); - - matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte()); - // Note that MoveMask has converted the equal vector elements into a set of bit flags, - // So the bit position in 'matches' corresponds to the element offset. - if (matches == 0) - { - // None matched - offset += (nuint)Vector128.Count; - continue; - } - - goto IntrinsicsMatch; - } - // Move to Vector length from end for final compare - search = LoadVector128(ref searchStart, lengthToExamine); - offset = lengthToExamine; - // Same as method as above - matches = Sse2.MoveMask(Sse2.CompareEqual(values0, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values1, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values2, search).AsByte()); - matches |= Sse2.MoveMask(Sse2.CompareEqual(values3, search).AsByte()); - if (matches == 0) - { - // None matched - goto NotFound; - } - } - - IntrinsicsMatch: - // Find bitflag offset of first difference and add to current offset, - // flags are in bytes so divide by 2 for chars (shift right by 1) - offset += (nuint)(uint)BitOperations.TrailingZeroCount(matches) >> 1; - goto Found; - } - - VectorCompare: - // We include the Supported check again here even though path will not be taken, so the asm isn't generated if not supported. - if (!Sse2.IsSupported && Vector.IsHardwareAccelerated) - { - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - Vector values3 = new Vector(value3); - - Vector search; - // First time this checks against 0 and we will move into final compare if it fails. - while (lengthToExamine > offset) - { - search = LoadVector(ref searchStart, offset); - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)), - Vector.Equals(search, values3)); - if (Vector.Zero.Equals(search)) - { - // None matched - offset += (nuint)Vector.Count; - continue; - } - - goto VectorMatch; - } - - // Move to Vector length from end for final compare - search = LoadVector(ref searchStart, lengthToExamine); - offset = lengthToExamine; - search = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)), - Vector.Equals(search, values3)); - if (Vector.Zero.Equals(search)) - { - // None matched - goto NotFound; - } - - VectorMatch: - offset += (nuint)(uint)LocateFirstFoundChar(search); - goto Found; - } - - Debug.Fail("Unreachable"); - goto NotFound; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) - => IndexOfAnyExcept(ref searchSpace, value0, value1, value2, value3, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value, int length) - => LastIndexOfAnyExcept(ref searchSpace, value, length); - - internal static int LastIndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; - nuint offset = (nuint)(uint)length; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVectorFromEnd(ref searchSpace, length); - } - SequentialScan: - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - offset -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found7; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - offset -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - offset -= 1; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp) - goto Found; - } - - if (Vector.IsHardwareAccelerated && (offset > 0)) - { - lengthToExamine = (offset & (nuint)~(Vector.Count - 1)); - - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - - while (lengthToExamine > (nuint)(Vector.Count - 1)) - { - Vector search = LoadVector(ref searchSpace, offset - (nuint)Vector.Count); - var matches = Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)); - if (Vector.Zero.Equals(matches)) - { - offset -= (nuint)Vector.Count; - lengthToExamine -= (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); - } - - if (offset > 0) - { - lengthToExamine = offset; - goto SequentialScan; - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyValueType(ref short searchSpace, short value0, short value1, int length) - => LastIndexOfAny(ref searchSpace, value0, value1, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) - => LastIndexOfAnyExcept(ref searchSpace, value0, value1, length); - - internal static int LastIndexOfAnyValueType(ref byte searchSpace, byte value0, byte value1, byte value2, int length) - { - Debug.Assert(length >= 0); - - uint uValue0 = value0; // Use uint for comparisons to avoid unnecessary 8->32 extensions - uint uValue1 = value1; - uint uValue2 = value2; - nuint offset = (nuint)(uint)length; // Use nuint for arithmetic to avoid unnecessary 64->32->64 truncations - nuint lengthToExamine = (nuint)(uint)length; - - if (Vector.IsHardwareAccelerated && length >= Vector.Count * 2) - { - lengthToExamine = UnalignedCountVectorFromEnd(ref searchSpace, length); - } - SequentialScan: - uint lookUp; - while (lengthToExamine >= 8) - { - lengthToExamine -= 8; - offset -= 8; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 7); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found7; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 6); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found6; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 5); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found5; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 4); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found4; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - } - - if (lengthToExamine >= 4) - { - lengthToExamine -= 4; - offset -= 4; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 3); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found3; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 2); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found2; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset + 1); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found1; - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - } - - while (lengthToExamine > 0) - { - lengthToExamine -= 1; - offset -= 1; - - lookUp = Unsafe.AddByteOffset(ref searchSpace, offset); - if (uValue0 == lookUp || uValue1 == lookUp || uValue2 == lookUp) - goto Found; - } - - if (Vector.IsHardwareAccelerated && (offset > 0)) - { - lengthToExamine = (offset & (nuint)~(Vector.Count - 1)); - - Vector values0 = new Vector(value0); - Vector values1 = new Vector(value1); - Vector values2 = new Vector(value2); - - while (lengthToExamine > (nuint)(Vector.Count - 1)) - { - Vector search = LoadVector(ref searchSpace, offset - (nuint)Vector.Count); - - var matches = Vector.BitwiseOr( - Vector.BitwiseOr( - Vector.Equals(search, values0), - Vector.Equals(search, values1)), - Vector.Equals(search, values2)); - - if (Vector.Zero.Equals(matches)) - { - offset -= (nuint)Vector.Count; - lengthToExamine -= (nuint)Vector.Count; - continue; - } - - // Find offset of first match and add to current offset - return (int)(offset) - Vector.Count + LocateLastFoundByte(matches); - } - - if (offset > 0) - { - lengthToExamine = offset; - goto SequentialScan; - } - } - return -1; - Found: // Workaround for https://github.com/dotnet/runtime/issues/8795 - return (int)offset; - Found1: - return (int)(offset + 1); - Found2: - return (int)(offset + 2); - Found3: - return (int)(offset + 3); - Found4: - return (int)(offset + 4); - Found5: - return (int)(offset + 5); - Found6: - return (int)(offset + 6); - Found7: - return (int)(offset + 7); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyValueType(ref short searchSpace, short value0, short value1, short value2, int length) - => LastIndexOfAny(ref searchSpace, value0, value1, value2, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) - => LastIndexOfAnyExcept(ref searchSpace, value0, value1, value2, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) - => LastIndexOfAnyExcept(ref searchSpace, value0, value1, value2, value3, length); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 LoadVector128(ref char start, nint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector128 LoadVector128(ref char start, nuint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (nint)offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 LoadVector256(ref char start, nint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static Vector256 LoadVector256(ref char start, nuint offset) - => Unsafe.ReadUnaligned>(ref Unsafe.As(ref Unsafe.Add(ref start, (nint)offset))); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static ref char Add(ref char start, nuint offset) => ref Unsafe.Add(ref start, (nint)offset); - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static uint FindFirstMatchedLane(Vector128 compareResult) - { - Debug.Assert(AdvSimd.Arm64.IsSupported); - - // Mask to help find the first lane in compareResult that is set. - // MSB 0x10 corresponds to 1st lane, 0x01 corresponds to 0th lane and so forth. - Vector128 mask = Vector128.Create((ushort)0x1001).AsByte(); - - // Find the first lane that is set inside compareResult. - Vector128 maskedSelectedLanes = AdvSimd.And(compareResult, mask); - Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(maskedSelectedLanes, maskedSelectedLanes); - ulong selectedLanes = pairwiseSelectedLane.AsUInt64().ToScalar(); - - // It should be handled by compareResult != Vector.Zero - Debug.Assert(selectedLanes != 0); - - // Find the first lane that is set inside compareResult. - return (uint)BitOperations.TrailingZeroCount(selectedLanes) >> 2; - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int FindFirstMatchedLane(Vector128 compareResult) - { - Debug.Assert(AdvSimd.Arm64.IsSupported); - - Vector128 pairwiseSelectedLane = AdvSimd.Arm64.AddPairwise(compareResult.AsByte(), compareResult.AsByte()); - ulong selectedLanes = pairwiseSelectedLane.AsUInt64().ToScalar(); - - // It should be handled by compareResult != Vector.Zero - Debug.Assert(selectedLanes != 0); - - return BitOperations.TrailingZeroCount(selectedLanes) >> 3; - } - - // Vector sub-search adapted from https://github.com/aspnet/KestrelHttpServer/pull/1138 - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int LocateLastFoundChar(Vector match) - { - var vector64 = Vector.AsVectorUInt64(match); - ulong candidate = 0; - int i = Vector.Count - 1; - - // This pattern is only unrolled by the Jit if the limit is Vector.Count - // As such, we need a dummy iteration variable for that condition to be satisfied - for (int j = 0; j < Vector.Count; j++) - { - candidate = vector64[i]; - if (candidate != 0) - { - break; - } - - i--; - } - - // Single LEA instruction with jitted const (using function result) - return i * 4 + LocateLastFoundChar(candidate); - } - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static int LocateLastFoundChar(ulong match) - => BitOperations.Log2(match) >> 4; - - [MethodImpl(MethodImplOptions.AggressiveInlining)] - private static unsafe nuint UnalignedCountVectorFromEnd(ref byte searchSpace, int length) - { - nint unaligned = (nint)Unsafe.AsPointer(ref searchSpace) & (Vector.Count - 1); - return (nuint)(uint)(((length & (Vector.Count - 1)) + unaligned) & (Vector.Count - 1)); - } - } -} diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index ea734fd5e289ad..a3f8ae589c7270 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -1444,7 +1444,6 @@ internal static bool ContainsValueType(ref T searchSpace, T value, int length return false; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfChar(ref char searchSpace, char value, int length) => IndexOfValueType(ref Unsafe.As(ref searchSpace), (short)value, length); @@ -1456,7 +1455,6 @@ internal static int IndexOfValueType(ref T searchSpace, T value, int length) [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value, int length) where T : struct, INumber => IndexOfValueType>(ref searchSpace, value, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int IndexOfValueType(ref TValue searchSpace, TValue value, int length) @@ -1571,7 +1569,6 @@ private static int IndexOfValueType(ref TValue searchSpace, TV return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyChar(ref char searchSpace, char value0, char value1, int length) => IndexOfAnyValueType(ref Unsafe.As(ref searchSpace), (short)value0, (short)value1, length); @@ -1583,7 +1580,6 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, length); -#endif // having INumber constraint here allows to use == operator and get better perf compared to .Equals [MethodImpl(MethodImplOptions.AggressiveOptimization)] @@ -1722,7 +1718,6 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); @@ -1730,7 +1725,6 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int IndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, int length) @@ -1868,7 +1862,6 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); @@ -1876,7 +1869,6 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int IndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => IndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int IndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, TValue value3, int length) @@ -2103,7 +2095,6 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfValueType(ref T searchSpace, T value, int length) where T : struct, INumber => LastIndexOfValueType>(ref searchSpace, value, length); @@ -2111,7 +2102,6 @@ internal static int LastIndexOfValueType(ref T searchSpace, T value, int leng [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value, int length) where T : struct, INumber => LastIndexOfValueType>(ref searchSpace, value, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfValueType(ref TValue searchSpace, TValue value, int length) @@ -2223,7 +2213,6 @@ private static int LastIndexOfValueType(ref TValue searchSpace return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T value1, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, length); @@ -2231,7 +2220,6 @@ internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T va [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, int length) @@ -2364,7 +2352,6 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); @@ -2372,7 +2359,6 @@ internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T va [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, int length) @@ -2506,7 +2492,6 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp return -1; } -#if !MONO [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); @@ -2514,7 +2499,6 @@ internal static int LastIndexOfAnyValueType(ref T searchSpace, T value0, T va [MethodImpl(MethodImplOptions.AggressiveInlining)] internal static int LastIndexOfAnyExceptValueType(ref T searchSpace, T value0, T value1, T value2, T value3, int length) where T : struct, INumber => LastIndexOfAnyValueType>(ref searchSpace, value0, value1, value2, value3, length); -#endif [MethodImpl(MethodImplOptions.AggressiveOptimization)] private static int LastIndexOfAnyValueType(ref TValue searchSpace, TValue value0, TValue value1, TValue value2, TValue value3, int length) From 3b73bb57ca376c899a502686878b967a0b8db929 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 17:16:10 +0200 Subject: [PATCH 02/12] [System.Span] Optimize hot loops by simplifying offset computation Before, for every single element, the address `address + offset + ct` had to be computed. In theory, a smart compiler would be able to reuse `address + offset` value and offset it only by a constant to obtain every single element. Do this explicitly to avoid reliance on advanced optimizations. --- .../src/System/SpanHelpers.Byte.cs | 36 +- .../src/System/SpanHelpers.T.cs | 343 ++++++++++-------- 2 files changed, 205 insertions(+), 174 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index 3a316d8f740891..b46e083855d65a 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -364,21 +364,22 @@ internal static unsafe int IndexOfNullByte(ref byte searchSpace) { lengthToExamine -= 8; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) + ref byte current = ref Unsafe.AddByteOffset(ref searchSpace, offset); + if (uValue == current) goto Found; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) + if (uValue == Unsafe.AddByteOffset(ref current, 1)) goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) + if (uValue == Unsafe.AddByteOffset(ref current, 2)) goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) + if (uValue == Unsafe.AddByteOffset(ref current, 3)) goto Found3; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 4)) + if (uValue == Unsafe.AddByteOffset(ref current, 4)) goto Found4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 5)) + if (uValue == Unsafe.AddByteOffset(ref current, 5)) goto Found5; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 6)) + if (uValue == Unsafe.AddByteOffset(ref current, 6)) goto Found6; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 7)) + if (uValue == Unsafe.AddByteOffset(ref current, 7)) goto Found7; offset += 8; @@ -388,13 +389,14 @@ internal static unsafe int IndexOfNullByte(ref byte searchSpace) { lengthToExamine -= 4; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset)) + ref byte current = ref Unsafe.AddByteOffset(ref searchSpace, offset); + if (uValue == current) goto Found; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 1)) + if (uValue == Unsafe.AddByteOffset(ref current, 1)) goto Found1; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 2)) + if (uValue == Unsafe.AddByteOffset(ref current, 2)) goto Found2; - if (uValue == Unsafe.AddByteOffset(ref searchSpace, offset + 3)) + if (uValue == Unsafe.AddByteOffset(ref current, 3)) goto Found3; offset += 4; @@ -985,10 +987,12 @@ public static nuint CommonPrefixLength(ref byte first, ref byte second, nuint le for (; (nint)i <= (nint)length - 4; i += 4) { - if (Unsafe.Add(ref first, i + 0) != Unsafe.Add(ref second, i + 0)) return i + 0; - if (Unsafe.Add(ref first, i + 1) != Unsafe.Add(ref second, i + 1)) return i + 1; - if (Unsafe.Add(ref first, i + 2) != Unsafe.Add(ref second, i + 2)) return i + 2; - if (Unsafe.Add(ref first, i + 3) != Unsafe.Add(ref second, i + 3)) return i + 3; + ref byte currentFirst = ref Unsafe.Add(ref first, i); + ref byte currentSecond = ref Unsafe.Add(ref second, i); + if (currentFirst != currentSecond) return i + 0; + if (Unsafe.Add(ref currentFirst, 1) != Unsafe.Add(ref currentSecond, 1)) return i + 1; + if (Unsafe.Add(ref currentFirst, 2) != Unsafe.Add(ref currentSecond, 2)) return i + 2; + if (Unsafe.Add(ref currentFirst, 3) != Unsafe.Add(ref currentSecond, 3)) return i + 3; } return length; diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index a3f8ae589c7270..053dcdd81eede7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -148,14 +148,15 @@ public static void Fill(ref T refData, nuint numElements, T value) nuint stopLoopAtOffset = numElements & ~(nuint)7; do { - Unsafe.Add(ref refData, (nint)i + 0) = value; - Unsafe.Add(ref refData, (nint)i + 1) = value; - Unsafe.Add(ref refData, (nint)i + 2) = value; - Unsafe.Add(ref refData, (nint)i + 3) = value; - Unsafe.Add(ref refData, (nint)i + 4) = value; - Unsafe.Add(ref refData, (nint)i + 5) = value; - Unsafe.Add(ref refData, (nint)i + 6) = value; - Unsafe.Add(ref refData, (nint)i + 7) = value; + ref T current = ref Unsafe.Add (ref refData, (nint)i); + current = value; + Unsafe.Add(ref current, 1) = value; + Unsafe.Add(ref current, 2) = value; + Unsafe.Add(ref current, 3) = value; + Unsafe.Add(ref current, 4) = value; + Unsafe.Add(ref current, 5) = value; + Unsafe.Add(ref current, 6) = value; + Unsafe.Add(ref current, 7) = value; } while ((i += 8) < stopLoopAtOffset); } @@ -163,10 +164,11 @@ public static void Fill(ref T refData, nuint numElements, T value) if ((numElements & 4) != 0) { - Unsafe.Add(ref refData, (nint)i + 0) = value; - Unsafe.Add(ref refData, (nint)i + 1) = value; - Unsafe.Add(ref refData, (nint)i + 2) = value; - Unsafe.Add(ref refData, (nint)i + 3) = value; + ref T current = ref Unsafe.Add (ref refData, (nint)i); + current = value; + Unsafe.Add(ref current, 1) = value; + Unsafe.Add(ref current, 2) = value; + Unsafe.Add(ref current, 3) = value; i += 4; } @@ -174,8 +176,9 @@ public static void Fill(ref T refData, nuint numElements, T value) if ((numElements & 2) != 0) { - Unsafe.Add(ref refData, (nint)i + 0) = value; - Unsafe.Add(ref refData, (nint)i + 1) = value; + ref T current = ref Unsafe.Add (ref refData, (nint)i); + current = value; + Unsafe.Add(ref current, 1) = value; i += 2; } @@ -237,14 +240,15 @@ public static unsafe bool Contains(ref T searchSpace, T value, int length) wh { length -= 8; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 0)) || - value.Equals(Unsafe.Add(ref searchSpace, index + 1)) || - value.Equals(Unsafe.Add(ref searchSpace, index + 2)) || - value.Equals(Unsafe.Add(ref searchSpace, index + 3)) || - value.Equals(Unsafe.Add(ref searchSpace, index + 4)) || - value.Equals(Unsafe.Add(ref searchSpace, index + 5)) || - value.Equals(Unsafe.Add(ref searchSpace, index + 6)) || - value.Equals(Unsafe.Add(ref searchSpace, index + 7))) + ref T current = ref Unsafe.Add (ref searchSpace, index); + if (value.Equals(current) || + value.Equals(Unsafe.Add(ref current, 1)) || + value.Equals(Unsafe.Add(ref current, 2)) || + value.Equals(Unsafe.Add(ref current, 3)) || + value.Equals(Unsafe.Add(ref current, 4)) || + value.Equals(Unsafe.Add(ref current, 5)) || + value.Equals(Unsafe.Add(ref current, 6)) || + value.Equals(Unsafe.Add(ref current, 7))) { goto Found; } @@ -256,10 +260,11 @@ public static unsafe bool Contains(ref T searchSpace, T value, int length) wh { length -= 4; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 0)) || - value.Equals(Unsafe.Add(ref searchSpace, index + 1)) || - value.Equals(Unsafe.Add(ref searchSpace, index + 2)) || - value.Equals(Unsafe.Add(ref searchSpace, index + 3))) + ref T current = ref Unsafe.Add (ref searchSpace, index); + if (value.Equals(current) || + value.Equals(Unsafe.Add(ref current, 1)) || + value.Equals(Unsafe.Add(ref current, 2)) || + value.Equals(Unsafe.Add(ref current, 3))) { goto Found; } @@ -308,21 +313,22 @@ public static unsafe int IndexOf(ref T searchSpace, T value, int length) wher { length -= 8; - if (value.Equals(Unsafe.Add(ref searchSpace, index))) + ref T current = ref Unsafe.Add (ref searchSpace, index); + if (value.Equals(current)) goto Found; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 1))) + if (value.Equals(Unsafe.Add(ref current, 1))) goto Found1; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 2))) + if (value.Equals(Unsafe.Add(ref current, 2))) goto Found2; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 3))) + if (value.Equals(Unsafe.Add(ref current, 3))) goto Found3; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 4))) + if (value.Equals(Unsafe.Add(ref current, 4))) goto Found4; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 5))) + if (value.Equals(Unsafe.Add(ref current, 5))) goto Found5; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 6))) + if (value.Equals(Unsafe.Add(ref current, 6))) goto Found6; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 7))) + if (value.Equals(Unsafe.Add(ref current, 7))) goto Found7; index += 8; @@ -332,13 +338,14 @@ public static unsafe int IndexOf(ref T searchSpace, T value, int length) wher { length -= 4; - if (value.Equals(Unsafe.Add(ref searchSpace, index))) + ref T current = ref Unsafe.Add (ref searchSpace, index); + if (value.Equals(current)) goto Found; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 1))) + if (value.Equals(Unsafe.Add(ref current, 1))) goto Found1; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 2))) + if (value.Equals(Unsafe.Add(ref current, 2))) goto Found2; - if (value.Equals(Unsafe.Add(ref searchSpace, index + 3))) + if (value.Equals(Unsafe.Add(ref current, 3))) goto Found3; index += 4; @@ -396,28 +403,29 @@ public static int IndexOfAny(ref T searchSpace, T value0, T value1, int lengt while ((length - index) >= 8) { - lookUp = Unsafe.Add(ref searchSpace, index); + ref T current = ref Unsafe.Add (ref searchSpace, index); + lookUp = current; if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found; - lookUp = Unsafe.Add(ref searchSpace, index + 1); + lookUp = Unsafe.Add(ref current, 1); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found1; - lookUp = Unsafe.Add(ref searchSpace, index + 2); + lookUp = Unsafe.Add(ref current, 2); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found2; - lookUp = Unsafe.Add(ref searchSpace, index + 3); + lookUp = Unsafe.Add(ref current, 3); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found3; - lookUp = Unsafe.Add(ref searchSpace, index + 4); + lookUp = Unsafe.Add(ref current, 4); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found4; - lookUp = Unsafe.Add(ref searchSpace, index + 5); + lookUp = Unsafe.Add(ref current, 5); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found5; - lookUp = Unsafe.Add(ref searchSpace, index + 6); + lookUp = Unsafe.Add(ref current, 6); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found6; - lookUp = Unsafe.Add(ref searchSpace, index + 7); + lookUp = Unsafe.Add(ref current, 7); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found7; @@ -426,16 +434,17 @@ public static int IndexOfAny(ref T searchSpace, T value0, T value1, int lengt if ((length - index) >= 4) { - lookUp = Unsafe.Add(ref searchSpace, index); + ref T current = ref Unsafe.Add (ref searchSpace, index); + lookUp = current; if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found; - lookUp = Unsafe.Add(ref searchSpace, index + 1); + lookUp = Unsafe.Add(ref current, 1); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found1; - lookUp = Unsafe.Add(ref searchSpace, index + 2); + lookUp = Unsafe.Add(ref current, 2); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found2; - lookUp = Unsafe.Add(ref searchSpace, index + 3); + lookUp = Unsafe.Add(ref current, 3); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found3; @@ -502,28 +511,29 @@ public static int IndexOfAny(ref T searchSpace, T value0, T value1, T value2, while ((length - index) >= 8) { - lookUp = Unsafe.Add(ref searchSpace, index); + ref T current = ref Unsafe.Add (ref searchSpace, index); + lookUp = current; if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found; - lookUp = Unsafe.Add(ref searchSpace, index + 1); + lookUp = Unsafe.Add(ref current, 1); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found1; - lookUp = Unsafe.Add(ref searchSpace, index + 2); + lookUp = Unsafe.Add(ref current, 2); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found2; - lookUp = Unsafe.Add(ref searchSpace, index + 3); + lookUp = Unsafe.Add(ref current, 3); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found3; - lookUp = Unsafe.Add(ref searchSpace, index + 4); + lookUp = Unsafe.Add(ref current, 4); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found4; - lookUp = Unsafe.Add(ref searchSpace, index + 5); + lookUp = Unsafe.Add(ref current, 5); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found5; - lookUp = Unsafe.Add(ref searchSpace, index + 6); + lookUp = Unsafe.Add(ref current, 6); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found6; - lookUp = Unsafe.Add(ref searchSpace, index + 7); + lookUp = Unsafe.Add(ref current, 7); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found7; @@ -532,16 +542,17 @@ public static int IndexOfAny(ref T searchSpace, T value0, T value1, T value2, if ((length - index) >= 4) { - lookUp = Unsafe.Add(ref searchSpace, index); + ref T current = ref Unsafe.Add (ref searchSpace, index); + lookUp = current; if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found; - lookUp = Unsafe.Add(ref searchSpace, index + 1); + lookUp = Unsafe.Add(ref current, 1); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found1; - lookUp = Unsafe.Add(ref searchSpace, index + 2); + lookUp = Unsafe.Add(ref current, 2); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found2; - lookUp = Unsafe.Add(ref searchSpace, index + 3); + lookUp = Unsafe.Add(ref current, 3); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found3; @@ -718,21 +729,22 @@ public static int LastIndexOf(ref T searchSpace, T value, int length) where T { length -= 8; - if (value.Equals(Unsafe.Add(ref searchSpace, length + 7))) + ref T current = ref Unsafe.Add (ref searchSpace, length); + if (value.Equals(Unsafe.Add(ref current, 7))) goto Found7; - if (value.Equals(Unsafe.Add(ref searchSpace, length + 6))) + if (value.Equals(Unsafe.Add(ref current, 6))) goto Found6; - if (value.Equals(Unsafe.Add(ref searchSpace, length + 5))) + if (value.Equals(Unsafe.Add(ref current, 5))) goto Found5; - if (value.Equals(Unsafe.Add(ref searchSpace, length + 4))) + if (value.Equals(Unsafe.Add(ref current, 4))) goto Found4; - if (value.Equals(Unsafe.Add(ref searchSpace, length + 3))) + if (value.Equals(Unsafe.Add(ref current, 3))) goto Found3; - if (value.Equals(Unsafe.Add(ref searchSpace, length + 2))) + if (value.Equals(Unsafe.Add(ref current, 2))) goto Found2; - if (value.Equals(Unsafe.Add(ref searchSpace, length + 1))) + if (value.Equals(Unsafe.Add(ref current, 1))) goto Found1; - if (value.Equals(Unsafe.Add(ref searchSpace, length))) + if (value.Equals(current)) goto Found; } @@ -740,13 +752,14 @@ public static int LastIndexOf(ref T searchSpace, T value, int length) where T { length -= 4; - if (value.Equals(Unsafe.Add(ref searchSpace, length + 3))) + ref T current = ref Unsafe.Add (ref searchSpace, length); + if (value.Equals(Unsafe.Add(ref current, 3))) goto Found3; - if (value.Equals(Unsafe.Add(ref searchSpace, length + 2))) + if (value.Equals(Unsafe.Add(ref current, 2))) goto Found2; - if (value.Equals(Unsafe.Add(ref searchSpace, length + 1))) + if (value.Equals(Unsafe.Add(ref current, 1))) goto Found1; - if (value.Equals(Unsafe.Add(ref searchSpace, length))) + if (value.Equals(current)) goto Found; } @@ -802,28 +815,29 @@ public static int LastIndexOfAny(ref T searchSpace, T value0, T value1, int l { length -= 8; - lookUp = Unsafe.Add(ref searchSpace, length + 7); + ref T current = ref Unsafe.Add (ref searchSpace, length); + lookUp = Unsafe.Add(ref current, 7); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found7; - lookUp = Unsafe.Add(ref searchSpace, length + 6); + lookUp = Unsafe.Add(ref current, 6); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found6; - lookUp = Unsafe.Add(ref searchSpace, length + 5); + lookUp = Unsafe.Add(ref current, 5); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found5; - lookUp = Unsafe.Add(ref searchSpace, length + 4); + lookUp = Unsafe.Add(ref current, 4); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found4; - lookUp = Unsafe.Add(ref searchSpace, length + 3); + lookUp = Unsafe.Add(ref current, 3); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found3; - lookUp = Unsafe.Add(ref searchSpace, length + 2); + lookUp = Unsafe.Add(ref current, 2); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found2; - lookUp = Unsafe.Add(ref searchSpace, length + 1); + lookUp = Unsafe.Add(ref current, 1); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found1; - lookUp = Unsafe.Add(ref searchSpace, length); + lookUp = current; if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found; } @@ -832,16 +846,17 @@ public static int LastIndexOfAny(ref T searchSpace, T value0, T value1, int l { length -= 4; - lookUp = Unsafe.Add(ref searchSpace, length + 3); + ref T current = ref Unsafe.Add (ref searchSpace, length); + lookUp = Unsafe.Add(ref current, 3); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found3; - lookUp = Unsafe.Add(ref searchSpace, length + 2); + lookUp = Unsafe.Add(ref current, 2); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found2; - lookUp = Unsafe.Add(ref searchSpace, length + 1); + lookUp = Unsafe.Add(ref current, 1); if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found1; - lookUp = Unsafe.Add(ref searchSpace, length); + lookUp = current; if (value0.Equals(lookUp) || value1.Equals(lookUp)) goto Found; } @@ -907,28 +922,29 @@ public static int LastIndexOfAny(ref T searchSpace, T value0, T value1, T val { length -= 8; - lookUp = Unsafe.Add(ref searchSpace, length + 7); + ref T current = ref Unsafe.Add (ref searchSpace, length); + lookUp = Unsafe.Add(ref current, 7); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found7; - lookUp = Unsafe.Add(ref searchSpace, length + 6); + lookUp = Unsafe.Add(ref current, 6); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found6; - lookUp = Unsafe.Add(ref searchSpace, length + 5); + lookUp = Unsafe.Add(ref current, 5); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found5; - lookUp = Unsafe.Add(ref searchSpace, length + 4); + lookUp = Unsafe.Add(ref current, 4); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found4; - lookUp = Unsafe.Add(ref searchSpace, length + 3); + lookUp = Unsafe.Add(ref current, 3); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found3; - lookUp = Unsafe.Add(ref searchSpace, length + 2); + lookUp = Unsafe.Add(ref current, 2); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found2; - lookUp = Unsafe.Add(ref searchSpace, length + 1); + lookUp = Unsafe.Add(ref current, 1); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found1; - lookUp = Unsafe.Add(ref searchSpace, length); + lookUp = current; if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found; } @@ -937,16 +953,17 @@ public static int LastIndexOfAny(ref T searchSpace, T value0, T value1, T val { length -= 4; - lookUp = Unsafe.Add(ref searchSpace, length + 3); + ref T current = ref Unsafe.Add (ref searchSpace, length); + lookUp = Unsafe.Add(ref current, 3); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found3; - lookUp = Unsafe.Add(ref searchSpace, length + 2); + lookUp = Unsafe.Add(ref current, 2); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found2; - lookUp = Unsafe.Add(ref searchSpace, length + 1); + lookUp = Unsafe.Add(ref current, 1); if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found1; - lookUp = Unsafe.Add(ref searchSpace, length); + lookUp = current; if (value0.Equals(lookUp) || value1.Equals(lookUp) || value2.Equals(lookUp)) goto Found; } @@ -1205,36 +1222,38 @@ public static bool SequenceEqual(ref T first, ref T second, int length) where { length -= 8; - lookUp0 = Unsafe.Add(ref first, index); - lookUp1 = Unsafe.Add(ref second, index); + ref T currentFirst = ref Unsafe.Add(ref first, index); + ref T currentSecond = ref Unsafe.Add(ref second, index); + lookUp0 = currentFirst; + lookUp1 = currentSecond; if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; - lookUp0 = Unsafe.Add(ref first, index + 1); - lookUp1 = Unsafe.Add(ref second, index + 1); + lookUp0 = Unsafe.Add(ref currentFirst, 1); + lookUp1 = Unsafe.Add(ref currentSecond, 1); if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; - lookUp0 = Unsafe.Add(ref first, index + 2); - lookUp1 = Unsafe.Add(ref second, index + 2); + lookUp0 = Unsafe.Add(ref currentFirst, 2); + lookUp1 = Unsafe.Add(ref currentSecond, 2); if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; - lookUp0 = Unsafe.Add(ref first, index + 3); - lookUp1 = Unsafe.Add(ref second, index + 3); + lookUp0 = Unsafe.Add(ref currentFirst, 3); + lookUp1 = Unsafe.Add(ref currentSecond, 3); if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; - lookUp0 = Unsafe.Add(ref first, index + 4); - lookUp1 = Unsafe.Add(ref second, index + 4); + lookUp0 = Unsafe.Add(ref currentFirst, 4); + lookUp1 = Unsafe.Add(ref currentSecond, 4); if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; - lookUp0 = Unsafe.Add(ref first, index + 5); - lookUp1 = Unsafe.Add(ref second, index + 5); + lookUp0 = Unsafe.Add(ref currentFirst, 5); + lookUp1 = Unsafe.Add(ref currentSecond, 5); if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; - lookUp0 = Unsafe.Add(ref first, index + 6); - lookUp1 = Unsafe.Add(ref second, index + 6); + lookUp0 = Unsafe.Add(ref currentFirst, 6); + lookUp1 = Unsafe.Add(ref currentSecond, 6); if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; - lookUp0 = Unsafe.Add(ref first, index + 7); - lookUp1 = Unsafe.Add(ref second, index + 7); + lookUp0 = Unsafe.Add(ref currentFirst, 7); + lookUp1 = Unsafe.Add(ref currentSecond, 7); if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; @@ -1245,20 +1264,22 @@ public static bool SequenceEqual(ref T first, ref T second, int length) where { length -= 4; - lookUp0 = Unsafe.Add(ref first, index); - lookUp1 = Unsafe.Add(ref second, index); + ref T currentFirst = ref Unsafe.Add(ref first, index); + ref T currentSecond = ref Unsafe.Add(ref second, index); + lookUp0 = currentFirst; + lookUp1 = currentSecond; if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; - lookUp0 = Unsafe.Add(ref first, index + 1); - lookUp1 = Unsafe.Add(ref second, index + 1); + lookUp0 = Unsafe.Add(ref currentFirst, 1); + lookUp1 = Unsafe.Add(ref currentSecond, 1); if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; - lookUp0 = Unsafe.Add(ref first, index + 2); - lookUp1 = Unsafe.Add(ref second, index + 2); + lookUp0 = Unsafe.Add(ref currentFirst, 2); + lookUp1 = Unsafe.Add(ref currentSecond, 2); if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; - lookUp0 = Unsafe.Add(ref first, index + 3); - lookUp1 = Unsafe.Add(ref second, index + 3); + lookUp0 = Unsafe.Add(ref currentFirst, 3); + lookUp1 = Unsafe.Add(ref currentSecond, 3); if (!(lookUp0?.Equals(lookUp1) ?? (object?)lookUp1 is null)) goto NotEqual; @@ -1341,14 +1362,15 @@ internal static bool ContainsValueType(ref T searchSpace, T value, int length { length -= 8; - if (Unsafe.Add(ref searchSpace, offset) == value - || Unsafe.Add(ref searchSpace, offset + 1) == value - || Unsafe.Add(ref searchSpace, offset + 2) == value - || Unsafe.Add(ref searchSpace, offset + 3) == value - || Unsafe.Add(ref searchSpace, offset + 4) == value - || Unsafe.Add(ref searchSpace, offset + 5) == value - || Unsafe.Add(ref searchSpace, offset + 6) == value - || Unsafe.Add(ref searchSpace, offset + 7) == value) + ref T current = ref Unsafe.Add (ref searchSpace, offset); + if (current == value + || Unsafe.Add(ref current, 1) == value + || Unsafe.Add(ref current, 2) == value + || Unsafe.Add(ref current, 3) == value + || Unsafe.Add(ref current, 4) == value + || Unsafe.Add(ref current, 5) == value + || Unsafe.Add(ref current, 6) == value + || Unsafe.Add(ref current, 7) == value) { return true; } @@ -1360,10 +1382,11 @@ internal static bool ContainsValueType(ref T searchSpace, T value, int length { length -= 4; - if (Unsafe.Add(ref searchSpace, offset) == value - || Unsafe.Add(ref searchSpace, offset + 1) == value - || Unsafe.Add(ref searchSpace, offset + 2) == value - || Unsafe.Add(ref searchSpace, offset + 3) == value) + ref T current = ref Unsafe.Add (ref searchSpace, offset); + if (current == value + || Unsafe.Add(ref current, 1) == value + || Unsafe.Add(ref current, 2) == value + || Unsafe.Add(ref current, 3) == value) { return true; } @@ -1472,14 +1495,15 @@ private static int IndexOfValueType(ref TValue searchSpace, TV { length -= 8; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 1) == value)) return (int)offset + 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 2) == value)) return (int)offset + 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 3) == value)) return (int)offset + 3; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 4) == value)) return (int)offset + 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 5) == value)) return (int)offset + 5; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 6) == value)) return (int)offset + 6; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 7) == value)) return (int)offset + 7; + ref TValue current = ref Unsafe.Add (ref searchSpace, offset); + if (TNegator.NegateIfNeeded(current == value)) return (int)offset; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 1) == value)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 2) == value)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 3) == value)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 4) == value)) return (int)offset + 4; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 5) == value)) return (int)offset + 5; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 6) == value)) return (int)offset + 6; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 7) == value)) return (int)offset + 7; offset += 8; } @@ -1488,10 +1512,11 @@ private static int IndexOfValueType(ref TValue searchSpace, TV { length -= 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 1) == value)) return (int)offset + 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 2) == value)) return (int)offset + 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset + 3) == value)) return (int)offset + 3; + ref TValue current = ref Unsafe.Add (ref searchSpace, offset); + if (TNegator.NegateIfNeeded(current == value)) return (int)offset; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 1) == value)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 2) == value)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 3) == value)) return (int)offset + 3; offset += 4; } @@ -2119,14 +2144,15 @@ private static int LastIndexOfValueType(ref TValue searchSpace { length -= 8; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 1) == value)) return (int)offset - 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 2) == value)) return (int)offset - 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 3) == value)) return (int)offset - 3; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 4) == value)) return (int)offset - 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 5) == value)) return (int)offset - 5; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 6) == value)) return (int)offset - 6; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 7) == value)) return (int)offset - 7; + ref TValue current = ref Unsafe.Add(ref searchSpace, offset); + if (TNegator.NegateIfNeeded(current == value)) return (int)offset; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -1) == value)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -2) == value)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -3) == value)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -4) == value)) return (int)offset - 4; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -5) == value)) return (int)offset - 5; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -6) == value)) return (int)offset - 6; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -7) == value)) return (int)offset - 7; offset -= 8; } @@ -2135,10 +2161,11 @@ private static int LastIndexOfValueType(ref TValue searchSpace { length -= 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 1) == value)) return (int)offset - 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 2) == value)) return (int)offset - 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset - 3) == value)) return (int)offset - 3; + ref TValue current = ref Unsafe.Add(ref searchSpace, offset); + if (TNegator.NegateIfNeeded(current == value)) return (int)offset; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -1) == value)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -2) == value)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -3) == value)) return (int)offset - 3; offset -= 4; } From a48fb6fce734981ba19249f9c290371b705c49c7 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 19:58:04 +0200 Subject: [PATCH 03/12] [System.Span] Refactor hot loop code This would replace code like ``` load b.neq add ret load b.neq add ret load .... ``` with ``` load b.eq load b.eq load ... ``` This makes the code more compact in the hot loop, reduces overall code size and thus improves performance. This pattern is widely used and it was also used before with Span lookups. --- .../src/System/SpanHelpers.Byte.cs | 16 +- .../src/System/SpanHelpers.T.cs | 316 ++++++++++++------ 2 files changed, 235 insertions(+), 97 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index b46e083855d65a..d9a9e441cc11e6 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -989,13 +989,21 @@ public static nuint CommonPrefixLength(ref byte first, ref byte second, nuint le { ref byte currentFirst = ref Unsafe.Add(ref first, i); ref byte currentSecond = ref Unsafe.Add(ref second, i); - if (currentFirst != currentSecond) return i + 0; - if (Unsafe.Add(ref currentFirst, 1) != Unsafe.Add(ref currentSecond, 1)) return i + 1; - if (Unsafe.Add(ref currentFirst, 2) != Unsafe.Add(ref currentSecond, 2)) return i + 2; - if (Unsafe.Add(ref currentFirst, 3) != Unsafe.Add(ref currentSecond, 3)) return i + 3; + if (currentFirst != currentSecond) goto Found0; + if (Unsafe.Add(ref currentFirst, 1) != Unsafe.Add(ref currentSecond, 1)) goto Found1; + if (Unsafe.Add(ref currentFirst, 2) != Unsafe.Add(ref currentSecond, 2)) goto Found2; + if (Unsafe.Add(ref currentFirst, 3) != Unsafe.Add(ref currentSecond, 3)) goto Found3; } return length; + Found0: + return i; + Found1: + return i + 1; + Found2: + return i + 2; + Found3: + return i + 3; } Debug.Assert(length >= (uint)Vector128.Count); diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 053dcdd81eede7..31ac7be85c8ebe 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -1496,14 +1496,14 @@ private static int IndexOfValueType(ref TValue searchSpace, TV length -= 8; ref TValue current = ref Unsafe.Add (ref searchSpace, offset); - if (TNegator.NegateIfNeeded(current == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 1) == value)) return (int)offset + 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 2) == value)) return (int)offset + 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 3) == value)) return (int)offset + 3; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 4) == value)) return (int)offset + 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 5) == value)) return (int)offset + 5; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 6) == value)) return (int)offset + 6; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 7) == value)) return (int)offset + 7; + if (TNegator.NegateIfNeeded(current == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 1) == value)) goto Found1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 2) == value)) goto Found2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 3) == value)) goto Found3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 4) == value)) goto Found4; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 5) == value)) goto Found5; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 6) == value)) goto Found6; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 7) == value)) goto Found7; offset += 8; } @@ -1513,10 +1513,10 @@ private static int IndexOfValueType(ref TValue searchSpace, TV length -= 4; ref TValue current = ref Unsafe.Add (ref searchSpace, offset); - if (TNegator.NegateIfNeeded(current == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 1) == value)) return (int)offset + 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 2) == value)) return (int)offset + 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 3) == value)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(current == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 1) == value)) goto Found1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 2) == value)) goto Found2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, 3) == value)) goto Found3; offset += 4; } @@ -1525,10 +1525,27 @@ private static int IndexOfValueType(ref TValue searchSpace, TV { length -= 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; offset += 1; } + return -1; + Found7: + return (int)(offset + 7); + Found6: + return (int)(offset + 6); + Found5: + return (int)(offset + 5); + Found4: + return (int)(offset + 4); + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -1628,21 +1645,21 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found3; lookUp = Unsafe.Add(ref current, 4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found4; lookUp = Unsafe.Add(ref current, 5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found5; lookUp = Unsafe.Add(ref current, 6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found6; lookUp = Unsafe.Add(ref current, 7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found7; offset += 8; } @@ -1654,13 +1671,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found3; offset += 4; } @@ -1670,10 +1687,27 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; offset += 1; } + return -1; + Found7: + return (int)(offset + 7); + Found6: + return (int)(offset + 6); + Found5: + return (int)(offset + 5); + Found4: + return (int)(offset + 4); + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -1772,21 +1806,21 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found3; lookUp = Unsafe.Add(ref current, 4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found4; lookUp = Unsafe.Add(ref current, 5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found5; lookUp = Unsafe.Add(ref current, 6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found6; lookUp = Unsafe.Add(ref current, 7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found7; offset += 8; } @@ -1798,13 +1832,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found3; offset += 4; } @@ -1814,10 +1848,27 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; offset += 1; } + return -1; + Found7: + return (int)(offset + 7); + Found6: + return (int)(offset + 6); + Found5: + return (int)(offset + 5); + Found4: + return (int)(offset + 4); + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -1914,13 +1965,13 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset + 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset + 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset + 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found3; offset += 4; } @@ -1930,10 +1981,19 @@ private static int IndexOfAnyValueType(ref TValue searchSpace, length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; offset += 1; } + return -1; + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2025,13 +2085,13 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 ref T current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4) return (int)offset; + if (lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4) goto Found; lookUp = Unsafe.Add(ref current, 1); - if (lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4) return (int)offset + 1; + if (lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4) goto Found1; lookUp = Unsafe.Add(ref current, 2); - if (lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4) return (int)offset + 2; + if (lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4) goto Found2; lookUp = Unsafe.Add(ref current, 3); - if (lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4) return (int)offset + 3; + if (lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4) goto Found3; offset += 4; } @@ -2041,10 +2101,20 @@ internal static int IndexOfAnyValueType(ref T searchSpace, T value0, T value1 length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4) return (int)offset; + if (lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3 || lookUp == value4) goto Found; offset += 1; } + + return -1; + Found3: + return (int)(offset + 3); + Found2: + return (int)(offset + 2); + Found1: + return (int)(offset + 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2145,14 +2215,14 @@ private static int LastIndexOfValueType(ref TValue searchSpace length -= 8; ref TValue current = ref Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(current == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -1) == value)) return (int)offset - 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -2) == value)) return (int)offset - 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -3) == value)) return (int)offset - 3; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -4) == value)) return (int)offset - 4; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -5) == value)) return (int)offset - 5; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -6) == value)) return (int)offset - 6; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -7) == value)) return (int)offset - 7; + if (TNegator.NegateIfNeeded(current == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -1) == value)) goto FoundM1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -2) == value)) goto FoundM2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -3) == value)) goto FoundM3; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -4) == value)) goto FoundM4; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -5) == value)) goto FoundM5; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -6) == value)) goto FoundM6; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -7) == value)) goto FoundM7; offset -= 8; } @@ -2162,10 +2232,10 @@ private static int LastIndexOfValueType(ref TValue searchSpace length -= 4; ref TValue current = ref Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(current == value)) return (int)offset; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -1) == value)) return (int)offset - 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -2) == value)) return (int)offset - 2; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -3) == value)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(current == value)) goto Found; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -1) == value)) goto FoundM1; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -2) == value)) goto FoundM2; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref current, -3) == value)) goto FoundM3; offset -= 4; } @@ -2174,10 +2244,27 @@ private static int LastIndexOfValueType(ref TValue searchSpace { length -= 1; - if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) return (int)offset; + if (TNegator.NegateIfNeeded(Unsafe.Add(ref searchSpace, offset) == value)) goto Found; offset -= 1; } + return -1; + FoundM7: + return (int)(offset - 7); + FoundM6: + return (int)(offset - 6); + FoundM5: + return (int)(offset - 5); + FoundM4: + return (int)(offset - 4); + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2269,21 +2356,21 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM3; lookUp = Unsafe.Add(ref current, -4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM4; lookUp = Unsafe.Add(ref current, -5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM5; lookUp = Unsafe.Add(ref current, -6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM6; lookUp = Unsafe.Add(ref current, -7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM7; offset -= 8; } @@ -2295,13 +2382,13 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto FoundM3; offset -= 4; } @@ -2311,10 +2398,27 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1)) goto Found; offset -= 1; } + return -1; + FoundM7: + return (int)(offset - 7); + FoundM6: + return (int)(offset - 6); + FoundM5: + return (int)(offset - 5); + FoundM4: + return (int)(offset - 4); + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2408,21 +2512,21 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM3; lookUp = Unsafe.Add(ref current, -4); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 4; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM4; lookUp = Unsafe.Add(ref current, -5); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 5; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM5; lookUp = Unsafe.Add(ref current, -6); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 6; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM6; lookUp = Unsafe.Add(ref current, -7); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 7; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM7; offset -= 8; } @@ -2434,13 +2538,13 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto FoundM3; offset -= 4; } @@ -2450,10 +2554,27 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2)) goto Found; offset -= 1; } + return -1; + FoundM7: + return (int)(offset - 7); + FoundM6: + return (int)(offset - 6); + FoundM5: + return (int)(offset - 5); + FoundM4: + return (int)(offset - 4); + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { @@ -2546,13 +2667,13 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp ref TValue current = ref Unsafe.Add(ref searchSpace, offset); lookUp = current; - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; lookUp = Unsafe.Add(ref current, -1); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset - 1; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto FoundM1; lookUp = Unsafe.Add(ref current, -2); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset - 2; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto FoundM2; lookUp = Unsafe.Add(ref current, -3); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset - 3; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto FoundM3; offset -= 4; } @@ -2562,10 +2683,19 @@ private static int LastIndexOfAnyValueType(ref TValue searchSp length -= 1; lookUp = Unsafe.Add(ref searchSpace, offset); - if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) return (int)offset; + if (TNegator.NegateIfNeeded(lookUp == value0 || lookUp == value1 || lookUp == value2 || lookUp == value3)) goto Found; offset -= 1; } + return -1; + FoundM3: + return (int)(offset - 3); + FoundM2: + return (int)(offset - 2); + FoundM1: + return (int)(offset - 1); + Found: + return (int)(offset); } else if (Vector256.IsHardwareAccelerated && length >= Vector256.Count) { From fe06a726462b2944300a287767f59135221419f4 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 19:57:17 +0200 Subject: [PATCH 04/12] [mono][interp] Replace compare + brfalse/brtrue with single conditional branch --- src/mono/mono/mini/interp/transform.c | 29 ++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 89a611b1713764..48880f444fae32 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -9386,6 +9386,34 @@ interp_super_instructions (TransformData *td) } } } else if (MINT_IS_UNOP_CONDITIONAL_BRANCH (opcode) && is_short_offset (noe, ins->info.target_bb->native_offset_estimate)) { + if (opcode == MINT_BRFALSE_I4 || opcode == MINT_BRTRUE_I4) { + gboolean negate = opcode == MINT_BRFALSE_I4; + int cond_sreg = ins->sregs [0]; + InterpInst *def = td->locals [cond_sreg].def; + if (def != NULL) { + int replace_opcode = -1; + switch (def->opcode) { + case MINT_CEQ_I4: replace_opcode = negate ? MINT_BNE_UN_I4 : MINT_BEQ_I4; break; + case MINT_CEQ_I8: replace_opcode = negate ? MINT_BNE_UN_I8 : MINT_BEQ_I8; break; + // Add more opcodes + default: + break; + } + if (replace_opcode != -1) { + ins->opcode = replace_opcode; + ins->sregs [0] = def->sregs [0]; + ins->sregs [1] = def->sregs [1]; + interp_clear_ins (def); + if (td->verbose_level) { + g_print ("superins: "); + dump_interp_inst (ins); + } + // The newly added opcode could be part of further superinstructions. Retry + ins = ins->prev; + continue; + } + } + } InterpInst *prev_ins = interp_prev_ins (ins); if (prev_ins && prev_ins->opcode == MINT_SAFEPOINT) { int condbr_op = get_unop_condbr_sp (opcode); @@ -9397,7 +9425,6 @@ interp_super_instructions (TransformData *td) dump_interp_inst (ins); } } - } } noe += get_inst_length (ins); From ff36071f180d77d64b7df6a937c3d85e3039f90c Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 22:16:40 +0200 Subject: [PATCH 05/12] [mono][interp] Dump in/out links for bblocks during verbose logging --- src/mono/mono/mini/interp/transform.c | 33 +++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 48880f444fae32..683fe031cb6b9e 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -1603,6 +1603,32 @@ dump_interp_inst (InterpInst *ins) g_string_free (str, TRUE); } +static GString* +get_interp_bb_links (InterpBasicBlock *bb) +{ + GString *str = g_string_new (""); + + if (bb->in_count) { + g_string_append_printf (str, "IN (%d", bb->in_bb [0]->index); + for (int i = 1; i < bb->in_count; i++) + g_string_append_printf (str, " %d", bb->in_bb [i]->index); + g_string_append_printf (str, "), "); + } else { + g_string_append_printf (str, "IN (nil), "); + } + + if (bb->out_count) { + g_string_append_printf (str, "OUT (%d", bb->out_bb [0]->index); + for (int i = 1; i < bb->out_count; i++) + g_string_append_printf (str, " %d", bb->out_bb [i]->index); + g_string_append_printf (str, ")"); + } else { + g_string_append_printf (str, "OUT (nil)"); + } + + return str; +} + static void dump_interp_bb (InterpBasicBlock *bb) { @@ -8659,8 +8685,11 @@ interp_cprop (TransformData *td) for (ins = bb->first_ins; ins != NULL; ins = ins->next) foreach_local_var (td, ins, local_defs, clear_local_defs); - if (td->verbose_level) - g_print ("BB%d\n", bb->index); + if (td->verbose_level) { + GString* bb_info = get_interp_bb_links (bb); + g_print ("\nBB%d: %s\n", bb->index, bb_info->str); + g_string_free (bb_info, TRUE); + } for (ins = bb->first_ins; ins != NULL; ins = ins->next) { int opcode = ins->opcode; From fb2bcde748b7622fdf06ecb43d842673f5054730 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 1 Nov 2022 23:07:29 +0200 Subject: [PATCH 06/12] [mono][interp] Improve detection of dead bblocks Before we were marking bblocks as dead if they had their in_count 0. This is not enough however, since it doesn't account for loops. We now do a full traversal of the bblock graph to detect unreachable bblocks. --- src/mono/mono/mini/interp/transform.c | 43 +++++++++++++++++++++++++-- src/mono/mono/mini/interp/transform.h | 1 + 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 683fe031cb6b9e..f07d76b4bac40d 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -662,7 +662,6 @@ interp_remove_bblock (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock { gboolean needs_cprop = FALSE; - g_assert (!bb->in_count); for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { if (ins->opcode == MINT_LDLOCA_S) { td->locals [ins->sregs [0]].indirects--; @@ -672,6 +671,8 @@ interp_remove_bblock (TransformData *td, InterpBasicBlock *bb, InterpBasicBlock } } } + while (bb->in_count) + interp_unlink_bblocks (bb->in_bb [0], bb); while (bb->out_count) interp_unlink_bblocks (bb, bb->out_bb [0]); prev_bb->next_bb = bb->next_bb; @@ -8185,6 +8186,42 @@ generate_compacted_code (TransformData *td) g_ptr_array_free (td->relocs, TRUE); } +static void +interp_mark_reachable_bblocks (TransformData *td) +{ + InterpBasicBlock **queue = mono_mem_manager_alloc0 (td->mem_manager, td->bb_count * sizeof (InterpBasicBlock*)); + InterpBasicBlock *current; + int cur_index = 0; + int next_position = 0; + + // FIXME There is no need to force eh bblocks to remain alive + current = td->entry_bb; + while (current != NULL) { + if (current->eh_block) { + queue [next_position++] = current; + current->reachable = TRUE; + } else { + current->reachable = FALSE; + } + current = current->next_bb; + } + + queue [next_position++] = td->entry_bb; + td->entry_bb->reachable = TRUE; + + // We have the roots, traverse everything else + while (cur_index < next_position) { + current = queue [cur_index++]; + for (int i = 0; i < current->out_count; i++) { + InterpBasicBlock *child = current->out_bb [i]; + if (!child->reachable) { + queue [next_position++] = child; + child->reachable = TRUE; + } + } + } +} + // Traverse the list of basic blocks and merge adjacent blocks static gboolean interp_optimize_bblocks (TransformData *td) @@ -8192,11 +8229,13 @@ interp_optimize_bblocks (TransformData *td) InterpBasicBlock *bb = td->entry_bb; gboolean needs_cprop = FALSE; + interp_mark_reachable_bblocks (td); + while (TRUE) { InterpBasicBlock *next_bb = bb->next_bb; if (!next_bb) break; - if (next_bb->in_count == 0 && !next_bb->eh_block) { + if (!next_bb->reachable) { if (td->verbose_level) g_print ("Removed BB%d\n", next_bb->index); needs_cprop |= interp_remove_bblock (td, next_bb, bb); diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index 7ad00ab0b2618d..70d80e21c60936 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -125,6 +125,7 @@ struct _InterpBasicBlock { SeqPoint **pred_seq_points; guint num_pred_seq_points; + int reachable : 1; // This block has special semantics and it shouldn't be optimized away int eh_block : 1; int dead: 1; From 5379908ff9bcac8cc4b8ae5fb22a9684b81d280c Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 3 Nov 2022 11:26:55 +0200 Subject: [PATCH 07/12] [mono][interp] Reorder bblocks to facilitate propagation of values Consider for example the following pattern used commonly with conditional branches: ``` br.s [nil <- nil], BB0 ... ceq0.i4 [32 <- 40], br.s [nil <- nil], BB1 BB0: ldc.i4.0 [32 <- nil], BB1: brfalse.i4.s [nil <- 32], BB_EXIT BB2: ldstr [56 <- nil], 2 ``` This commit reorders this code to look like: ``` br.s [nil <- nil], BB0 ... ceq0.i4 [32 <- 40], brfalse.i4.s [nil <- 32], BB_EXIT br.s [nil <- nil], BB2 BB0 ldc.i4.0 [32 <- nil], BB1: brfalse.i4.s [nil <- 32], BB_EXIT BB2: ldstr [56 <- nil], 2 ``` This means we will have duplicated brfalse instructions, but every basic block reaching the conditional branch will have information about the condition. For example ceq0.i4 + brfalse is equivalent to brtrue, ldc.i4.0 + brfalse is equivalent to unconditional branch. After other future optimizations applied on the bblocks graph, like removal, merging and propagation of target, the resulting code in this example would look like: ``` br.s [nil <- nil], BB_EXIT ... brtrue.i4.s [nil <- 40], BB_EXIT BB2: ldstr [56 <- nil], 2 ``` Which is a great simplification over the original code. --- src/mono/mono/mini/interp/transform.c | 110 ++++++++++++++++++++++++-- 1 file changed, 104 insertions(+), 6 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index f07d76b4bac40d..8053c84fcef6cf 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -276,15 +276,41 @@ interp_clear_ins (InterpInst *ins) ins->opcode = MINT_NOP; } +static gboolean +interp_ins_is_nop (InterpInst *ins) +{ + return ins->opcode == MINT_NOP || ins->opcode == MINT_IL_SEQ_POINT; +} + static InterpInst* interp_prev_ins (InterpInst *ins) { ins = ins->prev; - while (ins && (ins->opcode == MINT_NOP || ins->opcode == MINT_IL_SEQ_POINT)) + while (ins && interp_ins_is_nop (ins)) ins = ins->prev; return ins; } +static InterpInst* +interp_first_ins (InterpBasicBlock *bb) +{ + InterpInst *ins = bb->first_ins; + if (!ins || !interp_ins_is_nop (ins)) + return ins; + while (ins && interp_ins_is_nop (ins)) + ins = ins->next; + return ins; +} + +static InterpInst* +interp_last_ins (InterpBasicBlock *bb) +{ + InterpInst *ins = bb->last_ins; + if (!ins || !interp_ins_is_nop (ins)) + return ins; + return interp_prev_ins (ins); +} + #define CHECK_STACK(td, n) \ do { \ guint stack_size = GPTRDIFF_TO_UINT ((td)->sp - (td)->stack); \ @@ -3589,6 +3615,18 @@ interp_field_from_token (MonoMethod *method, guint32 token, MonoClass **klass, M return field; } +static InterpBasicBlock* +alloc_bb (TransformData *td) +{ + InterpBasicBlock *bb = (InterpBasicBlock*)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock)); + bb->il_offset = -1; + bb->native_offset = -1; + bb->stack_height = -1; + bb->index = td->bb_count++; + + return bb; +} + static InterpBasicBlock* get_bb (TransformData *td, unsigned char *ip, gboolean make_list) { @@ -3596,13 +3634,10 @@ get_bb (TransformData *td, unsigned char *ip, gboolean make_list) InterpBasicBlock *bb = td->offset_to_bb [offset]; if (!bb) { - bb = (InterpBasicBlock*)mono_mempool_alloc0 (td->mempool, sizeof (InterpBasicBlock)); + bb = alloc_bb (td); + bb->il_offset = offset; - bb->native_offset = -1; - bb->stack_height = -1; - bb->index = td->bb_count++; td->offset_to_bb [offset] = bb; - /* Add the blocks in reverse order */ if (make_list) td->basic_blocks = g_list_prepend_mempool (td->mempool, td->basic_blocks, bb); @@ -8222,6 +8257,67 @@ interp_mark_reachable_bblocks (TransformData *td) } } +static void +interp_reorder_bblocks (TransformData *td) +{ + InterpBasicBlock *bb; + + for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { + InterpInst *first = interp_first_ins (bb); + if (first && MINT_IS_CONDITIONAL_BRANCH (first->opcode)) { + // This means this bblock has a single instruction, the conditional branch + int i = 0; + while (i < bb->in_count) { + InterpBasicBlock *in_bb = bb->in_bb [i]; + InterpInst *last_ins = interp_last_ins (in_bb); + if (last_ins && last_ins->opcode == MINT_BR) { + // This bblock is reached unconditionally from one of its parents + // Move the conditional branch inside the parent to facilitate propagation + // of condition value. + InterpBasicBlock *cond_true_bb = first->info.target_bb; + InterpBasicBlock *next_bb = bb->next_bb; + + // parent bb will do the conditional branch + interp_unlink_bblocks (in_bb, bb); + last_ins->opcode = first->opcode; + last_ins->sregs [0] = first->sregs [0]; + last_ins->sregs [1] = first->sregs [1]; + last_ins->info.target_bb = cond_true_bb; + interp_link_bblocks (td, in_bb, cond_true_bb); + + // Create new fallthrough bb between in_bb and in_bb->next_bb + InterpBasicBlock *new_bb = alloc_bb (td); + new_bb->next_bb = in_bb->next_bb; + in_bb->next_bb = new_bb; + interp_link_bblocks (td, in_bb, new_bb); + + + InterpInst *new_inst = interp_insert_ins_bb (td, new_bb, NULL, MINT_BR); + new_inst->info.target_bb = next_bb; + + interp_link_bblocks (td, new_bb, next_bb); + if (td->verbose_level) { + GString* bb_info = get_interp_bb_links (bb); + GString* in_bb_info = get_interp_bb_links (in_bb); + GString* new_bb_info = get_interp_bb_links (new_bb); + g_print ("Moved cond branch BB%d into BB%d, new BB%d\n", bb->index, in_bb->index, new_bb->index); + g_print ("\tBB%d: %s\n", bb->index, bb_info->str); + g_print ("\tBB%d: %s\n", in_bb->index, in_bb_info->str); + g_print ("\tBB%d: %s\n", new_bb->index, new_bb_info->str); + g_string_free (bb_info, TRUE); + g_string_free (in_bb_info, TRUE); + g_string_free (new_bb_info, TRUE); + } + // Since we changed links, in_bb might have changed, loop again from the start + i = 0; + } else { + i++; + } + } + } + } +} + // Traverse the list of basic blocks and merge adjacent blocks static gboolean interp_optimize_bblocks (TransformData *td) @@ -8229,6 +8325,8 @@ interp_optimize_bblocks (TransformData *td) InterpBasicBlock *bb = td->entry_bb; gboolean needs_cprop = FALSE; + interp_reorder_bblocks (td); + interp_mark_reachable_bblocks (td); while (TRUE) { From c8edcde9e7af3ce45cbca400b293e8ccf407fab0 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 15 Nov 2022 17:22:33 +0200 Subject: [PATCH 08/12] [mono][interp] Don't optimize out bblocks that are tiering patchpoint targets Even though they can be become unreachable in the current method, they can still be called when the unoptimized method gets tiered at this point. Add assert to prevent such issues in the future. --- src/mono/mono/mini/interp/tiering.c | 4 +++- src/mono/mono/mini/interp/transform.c | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/mono/mono/mini/interp/tiering.c b/src/mono/mono/mini/interp/tiering.c index 7830c66c4c1110..1aa02dd341efeb 100644 --- a/src/mono/mono/mini/interp/tiering.c +++ b/src/mono/mono/mini/interp/tiering.c @@ -209,5 +209,7 @@ mono_interp_tier_up_frame_patchpoint (InterpFrame *frame, ThreadContext *context } context->stack_pointer = (guchar*)frame->stack + optimized_method->alloca_size; frame->imethod = optimized_method; - return optimized_method->code + lookup_patchpoint_data (optimized_method, bb_index); + int offset = lookup_patchpoint_data (optimized_method, bb_index); + g_assert (offset != G_MAXINT32); + return optimized_method->code + offset; } diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 8053c84fcef6cf..69e8cbc23fb1be 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8232,7 +8232,7 @@ interp_mark_reachable_bblocks (TransformData *td) // FIXME There is no need to force eh bblocks to remain alive current = td->entry_bb; while (current != NULL) { - if (current->eh_block) { + if (current->eh_block || current->patchpoint_data) { queue [next_position++] = current; current->reachable = TRUE; } else { @@ -8338,7 +8338,7 @@ interp_optimize_bblocks (TransformData *td) g_print ("Removed BB%d\n", next_bb->index); needs_cprop |= interp_remove_bblock (td, next_bb, bb); continue; - } else if (bb->out_count == 1 && bb->out_bb [0] == next_bb && next_bb->in_count == 1 && !next_bb->eh_block) { + } else if (bb->out_count == 1 && bb->out_bb [0] == next_bb && next_bb->in_count == 1 && !next_bb->eh_block && !next_bb->patchpoint_data) { g_assert (next_bb->in_bb [0] == bb); interp_merge_bblocks (td, bb, next_bb); if (td->verbose_level) From cffdc0235e9cb74ec5bbbb1e62d8b437709d26fd Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Tue, 15 Nov 2022 17:32:45 +0200 Subject: [PATCH 09/12] [mono][interp] Make bblock reordering more conservative If we are unlikely to gain anything from propagating the condition (if we don't have information about any of the condition operand vars), simply avoid the optimization. --- src/mono/mono/mini/interp/transform.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index 69e8cbc23fb1be..c08a83407f94a4 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8257,6 +8257,20 @@ interp_mark_reachable_bblocks (TransformData *td) } } +static gboolean +interp_prev_ins_defines_var (InterpInst *ins, int var1, int var2) +{ + // Check max of 5 instructions + for (int i = 0; i < 5; i++) { + ins = interp_prev_ins (ins); + if (!ins) + return FALSE; + if (mono_interp_op_dregs [ins->opcode] && (ins->dreg == var1 || ins->dreg == var2)) + return TRUE; + } + return FALSE; +} + static void interp_reorder_bblocks (TransformData *td) { @@ -8267,10 +8281,11 @@ interp_reorder_bblocks (TransformData *td) if (first && MINT_IS_CONDITIONAL_BRANCH (first->opcode)) { // This means this bblock has a single instruction, the conditional branch int i = 0; + int lookup_var2 = (mono_interp_op_dregs [first->opcode] > 1) ? first->sregs [1] : -1; while (i < bb->in_count) { InterpBasicBlock *in_bb = bb->in_bb [i]; InterpInst *last_ins = interp_last_ins (in_bb); - if (last_ins && last_ins->opcode == MINT_BR) { + if (last_ins && last_ins->opcode == MINT_BR && interp_prev_ins_defines_var (last_ins, first->sregs [0], lookup_var2)) { // This bblock is reached unconditionally from one of its parents // Move the conditional branch inside the parent to facilitate propagation // of condition value. From 5e8571f46e8f46bf8429c6e62b41e5475c354db5 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 3 Nov 2022 20:31:52 +0200 Subject: [PATCH 10/12] [mono][interp] Add basic removal of unused defines If we store in a var and this var is not used and redefined by the end of the basic block, then we can clear the original store. --- src/mono/mono/mini/interp/mintops.h | 3 +++ src/mono/mono/mini/interp/transform.c | 19 +++++++++++++++---- src/mono/mono/mini/interp/transform.h | 2 ++ 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/mono/mono/mini/interp/mintops.h b/src/mono/mono/mini/interp/mintops.h index f4f2f83aa80579..41a653477d3fc1 100644 --- a/src/mono/mono/mini/interp/mintops.h +++ b/src/mono/mono/mini/interp/mintops.h @@ -80,6 +80,9 @@ typedef enum { #define MINT_IS_STIND_INT(op) ((op) >= MINT_STIND_I1 && (op) <= MINT_STIND_I8) #define MINT_IS_STIND(op) ((op) >= MINT_STIND_I1 && (op) <= MINT_STIND_REF) +// TODO Add more +#define MINT_NO_SIDE_EFFECTS(op) (MINT_IS_MOV (op) || MINT_IS_LDC_I4 (op) || MINT_IS_LDC_I8 (op) || op == MINT_MONO_LDPTR) + #define MINT_CALL_ARGS 2 #define MINT_CALL_ARGS_SREG -2 diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index c08a83407f94a4..d1fb45d01cc557 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8392,10 +8392,7 @@ interp_local_deadce (TransformData *td) // Kill instructions that don't use stack and are storing into dead locals for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { for (InterpInst *ins = bb->first_ins; ins != NULL; ins = ins->next) { - if (MINT_IS_MOV (ins->opcode) || - MINT_IS_LDC_I4 (ins->opcode) || - MINT_IS_LDC_I8 (ins->opcode) || - ins->opcode == MINT_MONO_LDPTR || + if (MINT_NO_SIDE_EFFECTS (ins->opcode) || ins->opcode == MINT_LDLOCA_S) { int dreg = ins->dreg; if (td->locals [dreg].flags & INTERP_LOCAL_FLAG_DEAD) { @@ -8784,6 +8781,7 @@ cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, LocalValue *local_de int sreg = *psreg; local_ref_count [sreg]++; + local_defs [sreg].ref_count++; if (local_defs [sreg].type == LOCAL_VALUE_LOCAL) { int cprop_local = local_defs [sreg].local; @@ -8808,6 +8806,7 @@ clear_local_defs (TransformData *td, int var, void *data) LocalValue *local_defs = (LocalValue*) data; local_defs [var].type = LOCAL_VALUE_NONE; local_defs [var].ins = NULL; + local_defs [var].ref_count = 0; } static void @@ -8877,6 +8876,18 @@ interp_cprop (TransformData *td) } if (num_dregs) { + // Check if the previous definition of this var was used at all. + // If it wasn't we can just clear the instruction + if (local_defs [dreg].ins != NULL && + local_defs [dreg].ref_count == 0 && + !td->locals [dreg].indirects) { + InterpInst *prev_def = local_defs [dreg].ins; + if (MINT_NO_SIDE_EFFECTS (prev_def->opcode)) { + for (int i = 0; i < mono_interp_op_sregs [prev_def->opcode]; i++) + local_ref_count [prev_def->sregs [i]]--; + interp_clear_ins (prev_def); + } + } local_defs [dreg].type = LOCAL_VALUE_NONE; local_defs [dreg].ins = ins; local_defs [dreg].def_index = ins_index; diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index 70d80e21c60936..b9e0a20c964d32 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -59,6 +59,8 @@ typedef struct { // The instruction that writes this local. InterpInst *ins; int def_index; + // ref count for ins->dreg + int ref_count; } LocalValue; struct _InterpInst { From e4abb67e17622c6a2aece2b51fb69aaf893c3609 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 3 Nov 2022 21:40:32 +0200 Subject: [PATCH 11/12] [mono][interp] Clear unused defines of local only vars We detect if a var's value never escapes the definition of a bblock. We mark such vars and clear unused definitions of that var from other bblocks. --- src/mono/mono/mini/interp/transform.c | 54 ++++++++++++++++++++++++--- src/mono/mono/mini/interp/transform.h | 3 ++ 2 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index d1fb45d01cc557..d76745cf1cbe47 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8377,17 +8377,26 @@ interp_local_deadce (TransformData *td) for (unsigned int i = 0; i < td->locals_size; i++) { g_assert (local_ref_count [i] >= 0); g_assert (td->locals [i].indirects >= 0); - if (!local_ref_count [i] && - !td->locals [i].indirects && - (td->locals [i].flags & INTERP_LOCAL_FLAG_DEAD) == 0) { + if (td->locals [i].indirects || (td->locals [i].flags & INTERP_LOCAL_FLAG_DEAD)) + continue; + if (!local_ref_count [i]) { needs_dce = TRUE; td->locals [i].flags |= INTERP_LOCAL_FLAG_DEAD; + } else if (!(td->locals [i].flags & INTERP_LOCAL_FLAG_UNKNOWN_USE)) { + if (!(td->locals [i].flags & INTERP_LOCAL_FLAG_LOCAL_ONLY)) { + // The value of this var is not passed between multiple basic blocks + td->locals [i].flags |= INTERP_LOCAL_FLAG_LOCAL_ONLY; + if (td->verbose_level) + g_print ("Var %d is local only\n", i); + needs_cprop = TRUE; + } } + td->locals [i].flags &= ~INTERP_LOCAL_FLAG_UNKNOWN_USE; } // Return early if all locals are alive if (!needs_dce) - return FALSE; + return needs_cprop; // Kill instructions that don't use stack and are storing into dead locals for (InterpBasicBlock *bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { @@ -8797,6 +8806,8 @@ cprop_sreg (TransformData *td, InterpInst *ins, int *psreg, LocalValue *local_de local_ref_count [cprop_local]++; if (td->verbose_level) dump_interp_inst (ins); + } else if (!local_defs [sreg].ins) { + td->locals [sreg].flags |= INTERP_LOCAL_FLAG_UNKNOWN_USE; } } @@ -8809,6 +8820,34 @@ clear_local_defs (TransformData *td, int var, void *data) local_defs [var].ref_count = 0; } +static void +clear_unused_defs (TransformData *td, int var, void *data) +{ + if (!(td->locals [var].flags & INTERP_LOCAL_FLAG_LOCAL_ONLY)) + return; + if (td->locals [var].indirects) + return; + + LocalValue *local_def = &((LocalValue*) data) [var]; + InterpInst *def_ins = local_def->ins; + if (!def_ins) + return; + if (local_def->ref_count) + return; + + // This is a local only var that is defined in this bblock and its value is not used + // at all in this bblock. Clear the definition + if (MINT_NO_SIDE_EFFECTS (def_ins->opcode)) { + for (int i = 0; i < mono_interp_op_sregs [def_ins->opcode]; i++) + td->local_ref_count [def_ins->sregs [i]]--; + if (td->verbose_level) { + g_print ("kill unused local def:\n\t"); + dump_interp_inst (def_ins); + } + interp_clear_ins (def_ins); + } +} + static void interp_cprop (TransformData *td) { @@ -8817,6 +8856,7 @@ interp_cprop (TransformData *td) InterpBasicBlock *bb; gboolean needs_retry; int ins_index; + int iteration_count = 0; td->local_ref_count = local_ref_count; retry: @@ -8824,7 +8864,7 @@ interp_cprop (TransformData *td) memset (local_ref_count, 0, td->locals_size * sizeof (int)); if (td->verbose_level) - g_print ("\ncprop iteration\n"); + g_print ("\ncprop iteration %d\n", iteration_count++); for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { InterpInst *ins; @@ -9222,8 +9262,12 @@ interp_cprop (TransformData *td) needs_retry = TRUE; } } + ins_index++; } + + for (ins = bb->first_ins; ins != NULL; ins = ins->next) + foreach_local_var (td, ins, local_defs, clear_unused_defs); } needs_retry |= interp_local_deadce (td); diff --git a/src/mono/mono/mini/interp/transform.h b/src/mono/mono/mini/interp/transform.h index b9e0a20c964d32..fa66454455282c 100644 --- a/src/mono/mono/mini/interp/transform.h +++ b/src/mono/mono/mini/interp/transform.h @@ -21,6 +21,9 @@ #define INTERP_LOCAL_FLAG_GLOBAL 8 #define INTERP_LOCAL_FLAG_NO_CALL_ARGS 16 +#define INTERP_LOCAL_FLAG_UNKNOWN_USE 32 +#define INTERP_LOCAL_FLAG_LOCAL_ONLY 64 + typedef struct _InterpInst InterpInst; typedef struct _InterpBasicBlock InterpBasicBlock; From 954483aab3ab6dbde1875638dc7880626ae4e433 Mon Sep 17 00:00:00 2001 From: Vlad Brezae Date: Thu, 3 Nov 2022 22:22:47 +0200 Subject: [PATCH 12/12] [mono][interp] Propagate target branches If a bblock contains only an unconditional br, then all bblocks branching into it can just call the target directly instead. --- src/mono/mono/mini/interp/transform.c | 34 ++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/src/mono/mono/mini/interp/transform.c b/src/mono/mono/mini/interp/transform.c index d76745cf1cbe47..49085ae02db9a3 100644 --- a/src/mono/mono/mini/interp/transform.c +++ b/src/mono/mono/mini/interp/transform.c @@ -8278,7 +8278,9 @@ interp_reorder_bblocks (TransformData *td) for (bb = td->entry_bb; bb != NULL; bb = bb->next_bb) { InterpInst *first = interp_first_ins (bb); - if (first && MINT_IS_CONDITIONAL_BRANCH (first->opcode)) { + if (!first) + continue; + if (MINT_IS_CONDITIONAL_BRANCH (first->opcode)) { // This means this bblock has a single instruction, the conditional branch int i = 0; int lookup_var2 = (mono_interp_op_dregs [first->opcode] > 1) ? first->sregs [1] : -1; @@ -8329,6 +8331,36 @@ interp_reorder_bblocks (TransformData *td) i++; } } + } else if (first->opcode == MINT_BR) { + // All bblocks jumping into this bblock can jump directly into the br target + int i = 0; + while (i < bb->in_count) { + InterpBasicBlock *in_bb = bb->in_bb [i]; + InterpInst *last_ins = interp_last_ins (in_bb); + if (last_ins && (MINT_IS_CONDITIONAL_BRANCH (last_ins->opcode) || + MINT_IS_UNCONDITIONAL_BRANCH (last_ins->opcode)) && + last_ins->info.target_bb == bb) { + InterpBasicBlock *target_bb = first->info.target_bb; + last_ins->info.target_bb = target_bb; + interp_unlink_bblocks (in_bb, bb); + interp_link_bblocks (td, in_bb, target_bb); + if (td->verbose_level) { + GString* bb_info = get_interp_bb_links (bb); + GString* in_bb_info = get_interp_bb_links (in_bb); + GString* target_bb_info = get_interp_bb_links (target_bb); + g_print ("Propagated target bb BB%d into BB%d\n", target_bb->index, in_bb->index); + g_print ("\tBB%d: %s\n", bb->index, bb_info->str); + g_print ("\tBB%d: %s\n", in_bb->index, in_bb_info->str); + g_print ("\tBB%d: %s\n", target_bb->index, target_bb_info->str); + g_string_free (bb_info, TRUE); + g_string_free (in_bb_info, TRUE); + g_string_free (target_bb_info, TRUE); + } + i = 0; + } else { + i++; + } + } } } }