Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,7 @@
<Compile Include="$(MSBuildThisFileDirectory)System\Index.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Reflection\Emit\ILGenerator.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\BitVector256.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\ProbabilisticWithAsciiCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SingleCharSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\SingleByteSearchValues.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\SearchValues\Any2ByteSearchValues.cs" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,7 @@ internal static unsafe void ComputeBitmap<T>(ReadOnlySpan<T> values, out Vector2

if (value > 127)
{
// The values were modified concurrent with the call to SearchValues.Create
ThrowHelper.ThrowInvalidOperationException_InvalidOperation_EnumFailedVersion();
continue;
}

lookupLocal.Set(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;

namespace System.Buffers
{
Expand All @@ -14,16 +13,6 @@ internal sealed class ProbabilisticCharSearchValues : SearchValues<char>

public ProbabilisticCharSearchValues(scoped ReadOnlySpan<char> values)
{
if (Vector128.IsHardwareAccelerated && values.Length < 8)
{
// ProbabilisticMap does a Span.Contains check to confirm potential matches.
// If we have fewer than 8 values, pad them with existing ones to make the verification faster.
Span<char> newValues = stackalloc char[8];
newValues.Fill(values[0]);
values.CopyTo(newValues);
values = newValues;
}

_values = new string(values);
_map = new ProbabilisticMap(_values);
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,213 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.Wasm;
using System.Runtime.Intrinsics.X86;

namespace System.Buffers
{
internal sealed class ProbabilisticWithAsciiCharSearchValues<TOptimizations> : SearchValues<char>
where TOptimizations : struct, IndexOfAnyAsciiSearcher.IOptimizations
{
private Vector256<byte> _asciiBitmap;
private Vector256<byte> _inverseAsciiBitmap;
private ProbabilisticMap _map;
private readonly string _values;

public ProbabilisticWithAsciiCharSearchValues(scoped ReadOnlySpan<char> values)
{
Debug.Assert(IndexOfAnyAsciiSearcher.IsVectorizationSupported);
Debug.Assert(values.ContainsAnyInRange((char)0, (char)127));

IndexOfAnyAsciiSearcher.ComputeBitmap(values, out _asciiBitmap, out _);
_inverseAsciiBitmap = ~_asciiBitmap;

_values = new string(values);
_map = new ProbabilisticMap(_values);
}

internal override char[] GetValues() => _values.ToCharArray();

[MethodImpl(MethodImplOptions.AggressiveInlining)]
internal override bool ContainsCore(char value) =>
ProbabilisticMap.Contains(ref Unsafe.As<ProbabilisticMap, uint>(ref _map), _values, value);

internal override int IndexOfAny(ReadOnlySpan<char> span)
{
int offset = 0;

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
{
// We are using IndexOfAnyAsciiSearcher to search for the first ASCII character in the set, or any non-ASCII character.
// We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).

// If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM.
// Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0.
// Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're
// running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0).
Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1));

if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default))
{
Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0.");

offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _inverseAsciiBitmap);
}
else
{
Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0,
"The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle.");

offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _inverseAsciiBitmap);
}

// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
{
return offset;
}

// Fall back to using the ProbabilisticMap.
span = span.Slice(offset);
}

int index = ProbabilisticMap.IndexOfAny(
ref Unsafe.As<ProbabilisticMap, uint>(ref _map),
ref MemoryMarshal.GetReference(span),
span.Length,
_values);

if (index >= 0)
{
// We found a match. Account for the number of ASCII characters we've skipped previously.
index += offset;
}

return index;
}

internal override int IndexOfAnyExcept(ReadOnlySpan<char> span)
{
int offset = 0;

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
{
// Do a regular IndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
offset = IndexOfAnyAsciiSearcher.IndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _asciiBitmap);

// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
{
return offset;
}

// Fall back to a simple char-by-char search.
span = span.Slice(offset);
}

int index = ProbabilisticMap.IndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(
ref MemoryMarshal.GetReference(span),
span.Length,
_values);

if (index >= 0)
{
// We found a match. Account for the number of ASCII characters we've skipped previously.
index += offset;
}

return index;
}

internal override int LastIndexOfAny(ReadOnlySpan<char> span)
{
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
{
// We are using IndexOfAnyAsciiSearcher to search for the last ASCII character in the set, or any non-ASCII character.
// We do this by inverting the bitmap and using the opposite search function (Negate instead of DontNegate).

// If the bitmap we're using contains a 0, we have to use 'Ssse3AndWasmHandleZeroInNeedle' when running on X86 and WASM.
// Everything else should use 'Default'. 'TOptimizations' specifies whether '_asciiBitmap' contains a 0.
// Since we're using the inverse bitmap in this case, we have to use 'Ssse3AndWasmHandleZeroInNeedle' iff we're
// running on X86/WASM and 'TOptimizations' is 'Default' (as that means that the inverse bitmap definitely has a 0).
Debug.Assert((_asciiBitmap[0] & 1) != (_inverseAsciiBitmap[0] & 1));

int offset;

if ((Ssse3.IsSupported || PackedSimd.IsSupported) && typeof(TOptimizations) == typeof(IndexOfAnyAsciiSearcher.Default))
{
Debug.Assert((_inverseAsciiBitmap[0] & 1) == 1, "The inverse bitmap did not contain a 0.");

offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _inverseAsciiBitmap);
}
else
{
Debug.Assert(!(Ssse3.IsSupported || PackedSimd.IsSupported) || (_inverseAsciiBitmap[0] & 1) == 0,
"The inverse bitmap contained a 0, but we're not using Ssse3AndWasmHandleZeroInNeedle.");

offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, IndexOfAnyAsciiSearcher.Default>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _inverseAsciiBitmap);
}

// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
{
return offset;
}

// Fall back to using the ProbabilisticMap.
span = span.Slice(0, offset + 1);
}

return ProbabilisticMap.LastIndexOfAny(
ref Unsafe.As<ProbabilisticMap, uint>(ref _map),
ref MemoryMarshal.GetReference(span),
span.Length,
_values);
}

internal override int LastIndexOfAnyExcept(ReadOnlySpan<char> span)
{
if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && span.Length >= Vector128<short>.Count)
{
// Do a regular LastIndexOfAnyExcept for the ASCII characters. The search will stop if we encounter a non-ASCII char.
int offset = IndexOfAnyAsciiSearcher.LastIndexOfAnyVectorized<IndexOfAnyAsciiSearcher.Negate, TOptimizations>(
ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(span)),
span.Length,
ref _asciiBitmap);

// If we've reached the end of the span or stopped at an ASCII character, we've found the result.
if ((uint)offset >= (uint)span.Length || char.IsAscii(span[offset]))
{
return offset;
}

// Fall back to a simple char-by-char search.
span = span.Slice(0, offset + 1);
}

return ProbabilisticMap.LastIndexOfAnySimpleLoop<IndexOfAnyAsciiSearcher.Negate>(
ref MemoryMarshal.GetReference(span),
span.Length,
_values);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,29 @@ ref Unsafe.As<char, short>(ref MemoryMarshal.GetReference(values)),
return new Latin1CharSearchValues(values);
}

return new ProbabilisticCharSearchValues(values);
scoped ReadOnlySpan<char> probabilisticValues = values;

if (Vector128.IsHardwareAccelerated && values.Length < 8)
{
// ProbabilisticMap does a Span.Contains check to confirm potential matches.
// If we have fewer than 8 values, pad them with existing ones to make the verification faster.
Span<char> newValues = stackalloc char[8];
newValues.Fill(values[0]);
values.CopyTo(newValues);
probabilisticValues = newValues;
}

if (IndexOfAnyAsciiSearcher.IsVectorizationSupported && minInclusive < 128)
{
// If we have both ASCII and non-ASCII characters, use an implementation that
// does an optimistic ASCII fast-path and then falls back to the ProbabilisticMap.

return (Ssse3.IsSupported || PackedSimd.IsSupported) && probabilisticValues.Contains('\0')
? new ProbabilisticWithAsciiCharSearchValues<IndexOfAnyAsciiSearcher.Ssse3AndWasmHandleZeroInNeedle>(probabilisticValues)
: new ProbabilisticWithAsciiCharSearchValues<IndexOfAnyAsciiSearcher.Default>(probabilisticValues);
}

return new ProbabilisticCharSearchValues(probabilisticValues);
}

private static bool TryGetSingleRange<T>(ReadOnlySpan<T> values, out T minInclusive, out T maxInclusive)
Expand Down