-
Notifications
You must be signed in to change notification settings - Fork 5.2k
Apply hardware intrinsics to BitArray.*Shift
#113299
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
e29dedd
a01656a
a482701
e96a2b8
7681ebc
1c77a92
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -514,51 +514,94 @@ public BitArray RightShift(int count) | |||||||
| return this; | ||||||||
| } | ||||||||
|
|
||||||||
| Span<int> intSpan = MemoryMarshal.Cast<byte, int>((Span<byte>)_array); | ||||||||
|
|
||||||||
| Span<byte> thisSpan = new Span<byte>(_array, 0, GetByteArrayLengthFromBitLength(_bitLength)); | ||||||||
| int toIndex = 0; | ||||||||
| int ints = GetInt32ArrayLengthFromBitLength(_bitLength); | ||||||||
|
|
||||||||
| if (count < _bitLength) | ||||||||
| { | ||||||||
| // We can not use Math.DivRem without taking a dependency on System.Runtime.Extensions | ||||||||
| (int fromIndex, int shiftCount) = Math.DivRem(count, 32); | ||||||||
| int extraBits = (int)((uint)_bitLength % 32); | ||||||||
| (int fromIndex, int shiftCount) = Math.DivRem(count, BitsPerByte); | ||||||||
| if (shiftCount == 0) | ||||||||
| { | ||||||||
| // Cannot use `(1u << extraBits) - 1u` as the mask | ||||||||
| // because for extraBits == 0, we need the mask to be 111...111, not 0. | ||||||||
| // In that case, we are shifting a uint by 32, which could be considered undefined. | ||||||||
| // The result of a shift operation is undefined ... if the right operand | ||||||||
| // is greater than or equal to the width in bits of the promoted left operand, | ||||||||
| // https://learn.microsoft.com/cpp/c-language/bitwise-shift-operators?view=vs-2017 | ||||||||
| // However, the compiler protects us from undefined behaviour by constraining the | ||||||||
| // right operand to between 0 and width - 1 (inclusive), i.e. right_operand = (right_operand % width). | ||||||||
| uint mask = uint.MaxValue >> (BitsPerInt32 - extraBits); | ||||||||
| intSpan[ints - 1] &= ReverseIfBE((int)mask); | ||||||||
|
|
||||||||
| intSpan.Slice((int)fromIndex, ints - fromIndex).CopyTo(intSpan); | ||||||||
| toIndex = ints - fromIndex; | ||||||||
| thisSpan.Slice(fromIndex).CopyTo(thisSpan); | ||||||||
| toIndex = thisSpan.Length - fromIndex; | ||||||||
| } | ||||||||
| else | ||||||||
| { | ||||||||
| int lastIndex = ints - 1; | ||||||||
| if (Vector512.IsHardwareAccelerated) | ||||||||
| { | ||||||||
| toIndex = Apply<Vector512<byte>>(shiftCount, fromIndex, thisSpan); | ||||||||
| } | ||||||||
| else if (Vector256.IsHardwareAccelerated) | ||||||||
| { | ||||||||
| toIndex = Apply<Vector256<byte>>(shiftCount, fromIndex, thisSpan); | ||||||||
| } | ||||||||
| else if (Vector128.IsHardwareAccelerated) | ||||||||
| { | ||||||||
| toIndex = Apply<Vector128<byte>>(shiftCount, fromIndex, thisSpan); | ||||||||
| } | ||||||||
| fromIndex += toIndex; | ||||||||
|
|
||||||||
| int carryCount = BitsPerByte - shiftCount; | ||||||||
|
|
||||||||
| ref byte p = ref MemoryMarshal.GetReference(thisSpan); | ||||||||
|
|
||||||||
| const uint shiftUnit = 0x01010101u; | ||||||||
| uint shiftMask = (shiftUnit << carryCount) - shiftUnit; | ||||||||
| uint carryMask = ~shiftMask; | ||||||||
|
|
||||||||
| while (fromIndex < lastIndex) | ||||||||
| while (fromIndex < thisSpan.Length - 4) | ||||||||
| { | ||||||||
| uint right = (uint)ReverseIfBE(intSpan[fromIndex]) >> shiftCount; | ||||||||
| int left = ReverseIfBE(intSpan[++fromIndex]) << (BitsPerInt32 - shiftCount); | ||||||||
| intSpan[toIndex++] = ReverseIfBE(left | (int)right); | ||||||||
| uint lo = (Unsafe.ReadUnaligned<uint>(ref Unsafe.AddByteOffset(ref p, (uint)fromIndex)) >>> shiftCount) & shiftMask; | ||||||||
| uint hi = (Unsafe.ReadUnaligned<uint>(ref Unsafe.AddByteOffset(ref p, (uint)(fromIndex + 1))) << carryCount) & carryMask; | ||||||||
| uint result = hi | lo; | ||||||||
| Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref p, toIndex), result); | ||||||||
|
|
||||||||
| fromIndex += 4; | ||||||||
| toIndex += 4; | ||||||||
| } | ||||||||
|
|
||||||||
| uint mask = uint.MaxValue >> (BitsPerInt32 - extraBits); | ||||||||
| mask &= (uint)ReverseIfBE(intSpan[fromIndex]); | ||||||||
| intSpan[toIndex++] = ReverseIfBE((int)(mask >> shiftCount)); | ||||||||
| while (fromIndex < thisSpan.Length) | ||||||||
| { | ||||||||
| int lo = thisSpan[fromIndex] >>> shiftCount; | ||||||||
| int hi = | ||||||||
| fromIndex + 1 < thisSpan.Length | ||||||||
| ? thisSpan[fromIndex + 1] << carryCount | ||||||||
| : 0; | ||||||||
|
|
||||||||
| thisSpan[toIndex] = (byte)(hi | lo); | ||||||||
|
|
||||||||
| fromIndex++; | ||||||||
| toIndex++; | ||||||||
| } | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| intSpan.Slice(toIndex, ints - toIndex).Clear(); | ||||||||
| thisSpan.Slice(toIndex).Clear(); | ||||||||
| _version++; | ||||||||
| return this; | ||||||||
|
|
||||||||
| [MethodImpl(MethodImplOptions.AggressiveInlining)] | ||||||||
| static int Apply<TVector>(int shiftCount, int fromIndex, Span<byte> thisSpan) | ||||||||
| where TVector : ISimdVector<TVector, byte> | ||||||||
| { | ||||||||
| ref byte p = ref MemoryMarshal.GetReference(thisSpan); | ||||||||
| int carryCount = BitsPerByte - shiftCount; | ||||||||
|
|
||||||||
| int toIndex = 0; | ||||||||
|
|
||||||||
| while (fromIndex <= thisSpan.Length - (TVector.ElementCount + 1)) | ||||||||
| { | ||||||||
| TVector lo = TVector.LoadUnsafe(ref p, (uint)fromIndex) >>> shiftCount; | ||||||||
| TVector hi = TVector.LoadUnsafe(ref p, (uint)(fromIndex + 1)) << carryCount; | ||||||||
| TVector result = lo | hi; | ||||||||
| result.StoreUnsafe(ref p, (uint)toIndex); | ||||||||
|
|
||||||||
| fromIndex += TVector.ElementCount; | ||||||||
| toIndex += TVector.ElementCount; | ||||||||
| } | ||||||||
|
|
||||||||
| return toIndex; | ||||||||
| } | ||||||||
| } | ||||||||
|
|
||||||||
| /// <summary> | ||||||||
|
|
@@ -577,41 +620,95 @@ public BitArray LeftShift(int count) | |||||||
| return this; | ||||||||
| } | ||||||||
|
|
||||||||
| Span<int> intSpan = MemoryMarshal.Cast<byte, int>((Span<byte>)_array); | ||||||||
| Span<byte> thisSpan = new Span<byte>(_array, 0, GetByteArrayLengthFromBitLength(_bitLength)); | ||||||||
|
|
||||||||
| int lengthToClear; | ||||||||
| if (count < _bitLength) | ||||||||
| { | ||||||||
| int lastIndex = (int)((uint)(_bitLength - 1) / BitsPerInt32); | ||||||||
|
|
||||||||
| (lengthToClear, int shiftCount) = Math.DivRem(count, BitsPerInt32); | ||||||||
| (lengthToClear, int shiftCount) = Math.DivRem(count, BitsPerByte); | ||||||||
|
|
||||||||
| if (shiftCount == 0) | ||||||||
| { | ||||||||
| intSpan.Slice(0, lastIndex + 1 - lengthToClear).CopyTo(intSpan.Slice(lengthToClear)); | ||||||||
| thisSpan.Slice(0, thisSpan.Length - lengthToClear).CopyTo(thisSpan.Slice(lengthToClear)); | ||||||||
| } | ||||||||
| else | ||||||||
| { | ||||||||
| int fromindex = lastIndex - lengthToClear; | ||||||||
| int toIndex = thisSpan.Length; | ||||||||
| int fromIndex = toIndex - lengthToClear; | ||||||||
|
|
||||||||
| if (Vector512.IsHardwareAccelerated) | ||||||||
| { | ||||||||
| toIndex = Apply<Vector512<byte>>(shiftCount, fromIndex, thisSpan); | ||||||||
| } | ||||||||
| else if (Vector256.IsHardwareAccelerated) | ||||||||
| { | ||||||||
| toIndex = Apply<Vector256<byte>>(shiftCount, fromIndex, thisSpan); | ||||||||
| } | ||||||||
| else if (Vector128.IsHardwareAccelerated) | ||||||||
| { | ||||||||
| toIndex = Apply<Vector128<byte>>(shiftCount, fromIndex, thisSpan); | ||||||||
| } | ||||||||
| fromIndex = toIndex - lengthToClear; | ||||||||
|
|
||||||||
| int carryCount = BitsPerByte - shiftCount; | ||||||||
|
|
||||||||
| ref byte p = ref MemoryMarshal.GetReference(thisSpan); | ||||||||
|
|
||||||||
| const uint shiftUnit = 0x01010101u; | ||||||||
| uint carryMask = (shiftUnit << shiftCount) - shiftUnit; | ||||||||
| uint shiftMask = ~carryMask; | ||||||||
|
|
||||||||
| while (fromIndex >= 5) | ||||||||
| { | ||||||||
| uint lo = (Unsafe.ReadUnaligned<uint>(ref Unsafe.AddByteOffset(ref p, (uint)(fromIndex -= 4))) << shiftCount) & shiftMask; | ||||||||
|
||||||||
| uint lo = (Unsafe.ReadUnaligned<uint>(ref Unsafe.AddByteOffset(ref p, (uint)(fromIndex -= 4))) << shiftCount) & shiftMask; | |
| fromIndex -= 4; | |
| uint lo = (Unsafe.ReadUnaligned<uint>(ref Unsafe.AddByteOffset(ref p, (uint)fromIndex)) << shiftCount) & shiftMask; |
Copilot
AI
Oct 24, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] The decrement operation toIndex -= 4 is embedded within the offset calculation, which reduces readability. Consider separating this into two statements: first decrement toIndex, then perform the write operation.
| Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref p, toIndex -= 4), result); | |
| toIndex -= 4; | |
| Unsafe.WriteUnaligned(ref Unsafe.AddByteOffset(ref p, toIndex), result); |
Copilot
AI
Oct 24, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] The decrement operation fromIndex -= TVector.ElementCount is embedded within the offset calculation, which reduces readability. Consider separating this into two statements: first decrement fromIndex, then perform the load operation.
| TVector hi = TVector.LoadUnsafe(ref p, (nuint)(fromIndex -= TVector.ElementCount)) << shiftCount; | |
| fromIndex -= TVector.ElementCount; | |
| TVector hi = TVector.LoadUnsafe(ref p, (nuint)fromIndex) << shiftCount; |
Copilot
AI
Oct 24, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[nitpick] The decrement operation toIndex -= TVector.ElementCount is embedded within the offset calculation, which reduces readability. Consider separating this into two statements: first decrement toIndex, then perform the store operation.
| result.StoreUnsafe(ref p, (nuint)(toIndex -= TVector.ElementCount)); | |
| toIndex -= TVector.ElementCount; | |
| result.StoreUnsafe(ref p, (nuint)toIndex); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Corrected spelling of 'bitOffeset' to 'bitOffset'.