-
Notifications
You must be signed in to change notification settings - Fork 5.2k
Add a SearchValues ProbabilisticMap implementation that uses an ASCII fast path #89155
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
stephentoub
merged 3 commits into
dotnet:main
from
MihaZupan:searchvalues-probmap-ascii
Jul 19, 2023
Merged
Changes from all commits
Commits
Show all changes
3 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
213 changes: 213 additions & 0 deletions
213
.../System.Private.CoreLib/src/System/SearchValues/ProbabilisticWithAsciiCharSearchValues.cs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,213 @@ | ||
| // Licensed to the .NET Foundation under one or more agreements. | ||
| // The .NET Foundation licenses this file to you under the MIT license. | ||
|
|
||
| using System.Diagnostics; | ||
| using System.Runtime.CompilerServices; | ||
| using System.Runtime.InteropServices; | ||
| using System.Runtime.Intrinsics; | ||
| using System.Runtime.Intrinsics.Wasm; | ||
| using System.Runtime.Intrinsics.X86; | ||
|
|
||
| namespace System.Buffers | ||
| { | ||
| internal sealed class ProbabilisticWithAsciiCharSearchValues<TOptimizations> : SearchValues<char> | ||
| where TOptimizations : struct, IndexOfAnyAsciiSearcher.IOptimizations | ||
| { | ||
| private Vector256<byte> _asciiBitmap; | ||
| private Vector256<byte> _inverseAsciiBitmap; | ||
| private ProbabilisticMap _map; | ||
| private readonly string _values; | ||
|
|
||
| public ProbabilisticWithAsciiCharSearchValues(scoped ReadOnlySpan<char> values) | ||
| { | ||
| Debug.Assert(IndexOfAnyAsciiSearcher.IsVectorizationSupported); | ||
| Debug.Assert(values.ContainsAnyInRange((char)0, (char)127)); | ||
|
|
||
| IndexOfAnyAsciiSearcher.ComputeBitmap(values, out _asciiBitmap, out _); | ||
| _inverseAsciiBitmap = ~_asciiBitmap; | ||
|
|
||
| _values = new string(values); | ||
| _map = new ProbabilisticMap(_values); | ||
| } | ||
|
|
||
| internal override char[] GetValues() => _values.ToCharArray(); | ||
|
|
||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||
| internal override bool ContainsCore(char value) => | ||
| ProbabilisticMap.Contains(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), _values, value); | ||
|
|
||
| internal override int IndexOfAny(ReadOnlySpan<char> span) | ||
| { | ||
| int offset = 0; | ||
|
|
||
| if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count) | ||
| { | ||
| // We are using IndexOfAnyAsciiSearcher to search for the first ASCII character in the set, or any non-ASCII character. | ||
| // We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate). | ||
|
|
||
| // If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM. | ||
| // Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0. | ||
| // Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're | ||
| // running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0). | ||
| Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1)); | ||
|
|
||
| if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default)) | ||
| { | ||
| Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0."); | ||
|
|
||
| offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>( | ||
| ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)), | ||
| span.Length, | ||
| ref _inverseAsciiBitmap); | ||
| } | ||
| else | ||
| { | ||
| Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0, | ||
| "The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle."); | ||
|
|
||
| offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>( | ||
MihaZupan marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)), | ||
| span.Length, | ||
| ref _inverseAsciiBitmap); | ||
| } | ||
|
|
||
| // If we've reached the end of the span or stopped at an ASCII character, we've found the result. | ||
| if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset])) | ||
| { | ||
| return offset; | ||
| } | ||
|
|
||
| // Fall back to using the ProbabilisticMap. | ||
| span = span.Slice(offset); | ||
| } | ||
|
|
||
| int index = ProbabilisticMap.IndexOfAny( | ||
| ref Unsafe.As<ProbabilisticMap, uint>(ref _map), | ||
| ref MemoryMarshal.GetReference(span), | ||
| span.Length, | ||
| _values); | ||
|
|
||
| if (index >= 0) | ||
| { | ||
| // We found a match. Account for the number of ASCII characters we've skipped previously. | ||
| index += offset; | ||
| } | ||
|
|
||
| return index; | ||
| } | ||
|
|
||
| internal override int IndexOfAnyExcept(ReadOnlySpan<char> span) | ||
| { | ||
| int offset = 0; | ||
|
|
||
| if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count) | ||
| { | ||
| // Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. | ||
| offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>( | ||
| ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)), | ||
| span.Length, | ||
| ref _asciiBitmap); | ||
|
|
||
| // If we've reached the end of the span or stopped at an ASCII character, we've found the result. | ||
| if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset])) | ||
| { | ||
| return offset; | ||
| } | ||
|
|
||
| // Fall back to a simple char-by-char search. | ||
| span = span.Slice(offset); | ||
| } | ||
|
|
||
| int index = ProbabilisticMap.IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>( | ||
| ref MemoryMarshal.GetReference(span), | ||
| span.Length, | ||
| _values); | ||
|
|
||
| if (index >= 0) | ||
| { | ||
| // We found a match. Account for the number of ASCII characters we've skipped previously. | ||
| index += offset; | ||
| } | ||
|
|
||
| return index; | ||
| } | ||
|
|
||
| internal override int LastIndexOfAny(ReadOnlySpan<char> span) | ||
| { | ||
| if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count) | ||
| { | ||
| // We are using IndexOfAnyAsciiSearcher to search for the last ASCII character in the set, or any non-ASCII character. | ||
| // We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate). | ||
|
|
||
| // If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM. | ||
| // Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0. | ||
| // Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're | ||
| // running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0). | ||
| Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1)); | ||
|
|
||
| int offset; | ||
|
|
||
| if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default)) | ||
| { | ||
| Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0."); | ||
|
|
||
| offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>( | ||
| ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)), | ||
| span.Length, | ||
| ref _inverseAsciiBitmap); | ||
| } | ||
| else | ||
| { | ||
| Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0, | ||
| "The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle."); | ||
|
|
||
| offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>( | ||
| ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)), | ||
| span.Length, | ||
| ref _inverseAsciiBitmap); | ||
| } | ||
|
|
||
| // If we've reached the end of the span or stopped at an ASCII character, we've found the result. | ||
| if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset])) | ||
| { | ||
| return offset; | ||
| } | ||
|
|
||
| // Fall back to using the ProbabilisticMap. | ||
| span = span.Slice(0, offset + 1); | ||
| } | ||
|
|
||
| return ProbabilisticMap.LastIndexOfAny( | ||
| ref Unsafe.As<ProbabilisticMap, uint>(ref _map), | ||
| ref MemoryMarshal.GetReference(span), | ||
| span.Length, | ||
| _values); | ||
| } | ||
|
|
||
| internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span) | ||
| { | ||
| if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count) | ||
| { | ||
| // Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char. | ||
| int offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>( | ||
| ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)), | ||
| span.Length, | ||
| ref _asciiBitmap); | ||
|
|
||
| // If we've reached the end of the span or stopped at an ASCII character, we've found the result. | ||
| if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset])) | ||
| { | ||
| return offset; | ||
| } | ||
|
|
||
| // Fall back to a simple char-by-char search. | ||
| span = span.Slice(0, offset + 1); | ||
| } | ||
|
|
||
| return ProbabilisticMap.LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>( | ||
| ref MemoryMarshal.GetReference(span), | ||
| span.Length, | ||
| _values); | ||
| } | ||
| } | ||
| } | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.