diff --git a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems index b15194e5f8c370..a7b6331227f9a9 100644 --- a/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems +++ b/src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems @@ -415,6 +415,7 @@ + diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs index 4ce70d7407d2f3..4f81a9b19970c3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/IndexOfAnyAsciiSearcher.cs @@ -67,8 +67,7 @@ internal static unsafe void ComputeBitmap(ReadOnlySpan values, out Vector2 if (value > 127) { - // The values were modified concurrent with the call to SearchValues.Create - ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion(); + continue; } lookupLocal.Set(value); diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticCharSearchValues.cs index fc7b012821629a..5ec79a3b8911c8 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticCharSearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticCharSearchValues.cs @@ -3,7 +3,6 @@ using System.Runtime.CompilerServices; using System.Runtime.InteropServices; -using System.Runtime.Intrinsics; namespace System.Buffers { @@ -14,16 +13,6 @@ internal sealed class ProbabilisticCharSearchValues : SearchValues public ProbabilisticCharSearchValues(scoped ReadOnlySpan values) { - if (Vector128.IsHardwareAccelerated && values.Length < 8) - { - // ProbabilisticMap does a Span.Contains check to confirm potential matches. - // If we have fewer than 8 values, pad them with existing ones to make the verification faster. - Span newValues = stackalloc char[8]; - newValues.Fill(values[0]); - values.CopyTo(newValues); - values = newValues; - } - _values = new string(values); _map = new ProbabilisticMap(_values); } diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs new file mode 100644 index 00000000000000..065f2cd5e89372 --- /dev/null +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs @@ -0,0 +1,213 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System.Diagnostics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Wasm; +using System.Runtime.Intrinsics.X86; + +namespace System.Buffers +{ + internal sealed class ProbabilisticWithAsciiCharSearchValues : SearchValues + where TOptimizations : struct, IndexOfAnyAsciiSearcher.IOptimizations + { + private Vector256 _asciiBitmap; + private Vector256 _inverseAsciiBitmap; + private ProbabilisticMap _map; + private readonly string _values; + + public ProbabilisticWithAsciiCharSearchValues(scoped ReadOnlySpan values) + { + Debug.Assert(IndexOfAnyAsciiSearcher.IsVectorizationSupported); + Debug.Assert(values.ContainsAnyInRange((char)0, (char)127)); + + IndexOfAnyAsciiSearcher.ComputeBitmap(values, out _asciiBitmap, out _); + _inverseAsciiBitmap = ~_asciiBitmap; + + _values = new string(values); + _map = new ProbabilisticMap(_values); + } + + internal override char[] GetValues() => _values.ToCharArray(); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal override bool ContainsCore(char value) => + ProbabilisticMap.Contains(ref Unsafe.As(ref _map), _values, value); + + internal override int IndexOfAny(ReadOnlySpan span) + { + int offset = 0; + + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + { + // We are using IndexOfAnyAsciiSearcher to search for the first ASCII character in the set, or any non-ASCII character. + // We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate). + + // If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM. + // Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0. + // Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're + // running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0). + Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1)); + + if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default)) + { + Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0."); + + offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + span.Length, + ref _inverseAsciiBitmap); + } + else + { + Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0, + "The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle."); + + offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + span.Length, + ref _inverseAsciiBitmap); + } + + // If we've reached the end of the span or stopped at an ASCII character, we've found the result. + if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset])) + { + return offset; + } + + // Fall back to using the ProbabilisticMap. + span = span.Slice(offset); + } + + int index = ProbabilisticMap.IndexOfAny( + ref Unsafe.As(ref _map), + ref MemoryMarshal.GetReference(span), + span.Length, + _values); + + if (index >= 0) + { + // We found a match. Account for the number of ASCII characters we've skipped previously. + index += offset; + } + + return index; + } + + internal override int IndexOfAnyExcept(ReadOnlySpan span) + { + int offset = 0; + + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + { + // Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. + offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + span.Length, + ref _asciiBitmap); + + // If we've reached the end of the span or stopped at an ASCII character, we've found the result. + if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset])) + { + return offset; + } + + // Fall back to a simple char-by-char search. + span = span.Slice(offset); + } + + int index = ProbabilisticMap.IndexOfAnySimpleLoop( + ref MemoryMarshal.GetReference(span), + span.Length, + _values); + + if (index >= 0) + { + // We found a match. Account for the number of ASCII characters we've skipped previously. + index += offset; + } + + return index; + } + + internal override int LastIndexOfAny(ReadOnlySpan span) + { + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + { + // We are using IndexOfAnyAsciiSearcher to search for the last ASCII character in the set, or any non-ASCII character. + // We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate). + + // If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM. + // Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0. + // Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're + // running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0). + Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1)); + + int offset; + + if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default)) + { + Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0."); + + offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + span.Length, + ref _inverseAsciiBitmap); + } + else + { + Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0, + "The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle."); + + offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + span.Length, + ref _inverseAsciiBitmap); + } + + // If we've reached the end of the span or stopped at an ASCII character, we've found the result. + if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset])) + { + return offset; + } + + // Fall back to using the ProbabilisticMap. + span = span.Slice(0, offset + 1); + } + + return ProbabilisticMap.LastIndexOfAny( + ref Unsafe.As(ref _map), + ref MemoryMarshal.GetReference(span), + span.Length, + _values); + } + + internal override int LastIndexOfAnyExcept(ReadOnlySpan span) + { + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128.Count) + { + // Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. + int offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + span.Length, + ref _asciiBitmap); + + // If we've reached the end of the span or stopped at an ASCII character, we've found the result. + if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset])) + { + return offset; + } + + // Fall back to a simple char-by-char search. + span = span.Slice(0, offset + 1); + } + + return ProbabilisticMap.LastIndexOfAnySimpleLoop( + ref MemoryMarshal.GetReference(span), + span.Length, + _values); + } + } +} diff --git a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs index 9070e51b09818c..b7fee2b7b12458 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SearchValues/SearchValues.cs @@ -140,7 +140,29 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(values)), return new Latin1CharSearchValues(values); } - return new ProbabilisticCharSearchValues(values); + scoped ReadOnlySpan probabilisticValues = values; + + if (Vector128.IsHardwareAccelerated && values.Length < 8) + { + // ProbabilisticMap does a Span.Contains check to confirm potential matches. + // If we have fewer than 8 values, pad them with existing ones to make the verification faster. + Span newValues = stackalloc char[8]; + newValues.Fill(values[0]); + values.CopyTo(newValues); + probabilisticValues = newValues; + } + + if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && minInclusive < 128) + { + // If we have both ASCII and non-ASCII characters, use an implementation that + // does an optimistic ASCII fast-path and then falls back to the ProbabilisticMap. + + return (Ssse3.IsSupported || PackedSimd.IsSupported) && probabilisticValues.Contains('\0') + ? new ProbabilisticWithAsciiCharSearchValues(probabilisticValues) + : new ProbabilisticWithAsciiCharSearchValues(probabilisticValues); + } + + return new ProbabilisticCharSearchValues(probabilisticValues); } private static bool TryGetSingleRange(ReadOnlySpan values, out T minInclusive, out T maxInclusive)