From ade7f981a987a3ac04f7b0adb0757edc32b50221 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 26 Jan 2024 15:43:13 -0500 Subject: [PATCH 1/4] Add TensorPrimitives.ConvertTruncating/Saturating/Checked --- .../ref/System.Numerics.Tensors.netcore.cs | 3 + .../TensorPrimitives.Single.netcore.cs | 6 +- .../Tensors/netcore/TensorPrimitives.T.cs | 256 +- .../netcore/TensorPrimitives.netcore.cs | 5214 +++++++++-------- .../tests/TensorPrimitives.Generic.cs | 193 + 5 files changed, 3363 insertions(+), 2309 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs index 031ba109ba6b57..de78d145524b0b 100644 --- a/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/ref/System.Numerics.Tensors.netcore.cs @@ -35,6 +35,9 @@ public static void BitwiseOr(System.ReadOnlySpan x, System.ReadOnlySpan public static void BitwiseOr(System.ReadOnlySpan x, T y, System.Span destination) where T : System.Numerics.IBitwiseOperators { } public static void Cbrt(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IRootFunctions { } public static void Ceiling(System.ReadOnlySpan x, System.Span destination) where T : System.Numerics.IFloatingPoint { } + public static void ConvertChecked(System.ReadOnlySpan source, System.Span destination) where TFrom : System.Numerics.INumberBase where TTo : System.Numerics.INumberBase { } + public static void ConvertSaturating(System.ReadOnlySpan source, System.Span destination) where TFrom : System.Numerics.INumberBase where TTo : System.Numerics.INumberBase { } + public static void ConvertTruncating(System.ReadOnlySpan source, System.Span destination) where TFrom : System.Numerics.INumberBase where TTo : System.Numerics.INumberBase { } public static void ConvertToHalf(System.ReadOnlySpan source, System.Span destination) { } public static void ConvertToSingle(System.ReadOnlySpan source, System.Span destination) { } public static void CopySign(System.ReadOnlySpan x, System.ReadOnlySpan sign, System.Span destination) where T : System.Numerics.INumber { } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs index 1148639d48be02..32723d2b3c3c5e 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.Single.netcore.cs @@ -43,7 +43,7 @@ public static unsafe partial class TensorPrimitives { private static void InvokeSpanIntoSpan( ReadOnlySpan x, Span destination) - where TSingleUnaryOperator : struct, IUnaryOperator => + where TSingleUnaryOperator : struct, IUnaryOperator => InvokeSpanIntoSpan(x, destination); private static void InvokeSpanSpanIntoSpan( @@ -58,7 +58,7 @@ private static void InvokeSpanScalarIntoSpan( private static unsafe void InvokeSpanScalarIntoSpan( ReadOnlySpan x, float y, Span destination) - where TSingleTransformOperator : struct, IUnaryOperator + where TSingleTransformOperator : struct, IUnaryOperator where TSingleBinaryOperator : struct, IBinaryOperator => InvokeSpanScalarIntoSpan(x, y, destination); @@ -79,7 +79,7 @@ private static void InvokeSpanScalarSpanIntoSpan( private static unsafe float Aggregate( ReadOnlySpan x) - where TSingleTransformOperator : struct, IUnaryOperator + where TSingleTransformOperator : struct, IUnaryOperator where TSingleAggregationOperator : struct, IAggregationOperator => Aggregate(x); diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs index 4009257b22d36f..0562d74cc9dc49 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.T.cs @@ -1,6 +1,8 @@ // Licensed to the .NET Foundation under one or more agreements. // The .NET Foundation licenses this file to you under the MIT license. +using System.Runtime.CompilerServices; + namespace System.Numerics.Tensors { /// Performs primitive tensor operations over spans of memory. @@ -488,6 +490,249 @@ public static void Ceiling(ReadOnlySpan x, Span destination) where T : IFloatingPoint => InvokeSpanIntoSpan>(x, destination); + /// + /// Copies to , converting each + /// value to a value. + /// + /// The source span from which to copy values. + /// The destination span into which the converted values should be written. + /// Destination is too short. + /// + /// + /// This method effectively computes [i] = TTo.CreateChecked([i]). + /// + /// + public static void ConvertChecked(ReadOnlySpan source, Span destination) + where TFrom : INumberBase + where TTo : INumberBase + { + if (!TryConvertUniversal(source, destination)) + { + InvokeSpanIntoSpan>(source, destination); + } + } + + /// + /// Copies to , converting each + /// value to a value. + /// + /// The source span from which to copy values. + /// The destination span into which the converted values should be written. + /// Destination is too short. + /// + /// + /// This method effectively computes [i] = TTo.CreateSaturating([i]). + /// + /// + public static void ConvertSaturating(ReadOnlySpan source, Span destination) + where TFrom : INumberBase + where TTo : INumberBase + { + if (!TryConvertUniversal(source, destination)) + { + InvokeSpanIntoSpan>(source, destination); + } + } + + /// + /// Copies to , converting each + /// value to a value. + /// + /// The source span from which to copy values. + /// The destination span into which the converted values should be written. + /// Destination is too short. + /// + /// + /// This method effectively computes [i] = TTo.CreateTruncating([i]). + /// + /// + public static void ConvertTruncating(ReadOnlySpan source, Span destination) + where TFrom : INumberBase + where TTo : INumberBase + { + if (TryConvertUniversal(source, destination)) + { + return; + } + + if (((typeof(TFrom) == typeof(byte) || typeof(TFrom) == typeof(sbyte)) && (typeof(TTo) == typeof(byte) || typeof(TTo) == typeof(sbyte))) || + ((typeof(TFrom) == typeof(ushort) || typeof(TFrom) == typeof(short)) && (typeof(TTo) == typeof(ushort) || typeof(TTo) == typeof(short))) || + ((IsUInt32Like() || IsInt32Like()) && (IsUInt32Like() || IsInt32Like())) || + ((IsUInt64Like() || IsInt64Like()) && (IsUInt64Like() || IsInt64Like()))) + { + source.CopyTo(Rename(destination)); + return; + } + + if (typeof(TFrom) == typeof(float) && IsUInt32Like()) + { + InvokeSpanIntoSpan(Rename(source), Rename(destination)); + return; + } + + if (typeof(TFrom) == typeof(float) && IsInt32Like()) + { + InvokeSpanIntoSpan(Rename(source), Rename(destination)); + return; + } + + if (typeof(TFrom) == typeof(double) && IsUInt64Like()) + { + InvokeSpanIntoSpan(Rename(source), Rename(destination)); + return; + } + + if (typeof(TFrom) == typeof(double) && IsInt64Like()) + { + InvokeSpanIntoSpan(Rename(source), Rename(destination)); + return; + } + + if (typeof(TFrom) == typeof(ushort) && typeof(TTo) == typeof(byte)) + { + InvokeSpanIntoSpan_2to1(Rename(source), Rename(destination)); + return; + } + + if (typeof(TFrom) == typeof(short) && typeof(TTo) == typeof(sbyte)) + { + InvokeSpanIntoSpan_2to1(Rename(source), Rename(destination)); + return; + } + + if (IsUInt32Like() && typeof(TTo) == typeof(ushort)) + { + InvokeSpanIntoSpan_2to1(Rename(source), Rename(destination)); + return; + } + + if (IsInt32Like() && typeof(TTo) == typeof(short)) + { + InvokeSpanIntoSpan_2to1(Rename(source), Rename(destination)); + return; + } + + if (IsUInt64Like() && IsUInt32Like()) + { + InvokeSpanIntoSpan_2to1(Rename(source), Rename(destination)); + return; + } + + if (IsInt64Like() && IsInt32Like()) + { + InvokeSpanIntoSpan_2to1(Rename(source), Rename(destination)); + return; + } + + InvokeSpanIntoSpan>(source, destination); + } + + /// Performs conversions that are the same regardless of checked, truncating, or saturation. + [MethodImpl(MethodImplOptions.AggressiveInlining)] // at most one of the branches will be kept + private static bool TryConvertUniversal(ReadOnlySpan source, Span destination) + where TFrom : INumberBase + where TTo : INumberBase + { + if (typeof(TFrom) == typeof(TTo)) + { + if (source.Length > destination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(); + } + + ValidateInputOutputSpanNonOverlapping(source, Rename(destination)); + + source.CopyTo(Rename(destination)); + return true; + } + + if (IsInt32Like() && typeof(TTo) == typeof(float)) + { + InvokeSpanIntoSpan(Rename(source), Rename(destination)); + return true; + } + + if (IsUInt32Like() && typeof(TTo) == typeof(float)) + { + InvokeSpanIntoSpan(Rename(source), Rename(destination)); + return true; + } + + if (IsInt64Like() && typeof(TTo) == typeof(double)) + { + InvokeSpanIntoSpan(Rename(source), Rename(destination)); + return true; + } + + if (IsUInt64Like() && typeof(TTo) == typeof(double)) + { + InvokeSpanIntoSpan(Rename(source), Rename(destination)); + return true; + } + + if (typeof(TFrom) == typeof(float) && typeof(TTo) == typeof(Half)) + { + ConvertToHalf(Rename(source), Rename(destination)); + return true; + } + + if (typeof(TFrom) == typeof(Half) && typeof(TTo) == typeof(float)) + { + ConvertToSingle(Rename(source), Rename(destination)); + return true; + } + + if (typeof(TFrom) == typeof(float) && typeof(TTo) == typeof(double)) + { + InvokeSpanIntoSpan_1to2(Rename(source), Rename(destination)); + return true; + } + + if (typeof(TFrom) == typeof(double) && typeof(TTo) == typeof(float)) + { + InvokeSpanIntoSpan_2to1(Rename(source), Rename(destination)); + return true; + } + + if (typeof(TFrom) == typeof(byte) && typeof(TTo) == typeof(ushort)) + { + InvokeSpanIntoSpan_1to2(Rename(source), Rename(destination)); + return true; + } + + if (typeof(TFrom) == typeof(sbyte) && typeof(TTo) == typeof(short)) + { + InvokeSpanIntoSpan_1to2(Rename(source), Rename(destination)); + return true; + } + + if (typeof(TFrom) == typeof(ushort) && IsUInt32Like()) + { + InvokeSpanIntoSpan_1to2(Rename(source), Rename(destination)); + return true; + } + + if (typeof(TFrom) == typeof(short) && IsInt32Like()) + { + InvokeSpanIntoSpan_1to2(Rename(source), Rename(destination)); + return true; + } + + if (IsUInt32Like() && IsUInt64Like()) + { + InvokeSpanIntoSpan_1to2(Rename(source), Rename(destination)); + return true; + } + + if (IsInt32Like() && IsInt64Like()) + { + InvokeSpanIntoSpan_1to2(Rename(source), Rename(destination)); + return true; + } + + return false; + } + /// Computes the element-wise result of copying the sign from one number to another number in the specified tensors. /// The first tensor, represented as a span. /// The second tensor, represented as a span. @@ -963,15 +1208,14 @@ public static void Ieee754Remainder(T x, ReadOnlySpan y, Span destinati public static void ILogB(ReadOnlySpan x, Span destination) where T : IFloatingPointIeee754 { - if (x.Length > destination.Length) + if (typeof(T) == typeof(double)) { - ThrowHelper.ThrowArgument_DestinationTooShort(); + // Special-case double as the only vectorizable floating-point type whose size != sizeof(int). + InvokeSpanIntoSpan_2to1(Rename(x), destination); } - - // TODO: Vectorize - for (int i = 0; i < x.Length; i++) + else { - destination[i] = T.ILogB(x[i]); + InvokeSpanIntoSpan>(x, destination); } } diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index bb9285b59d1e2e..7cada40c624203 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -800,7 +800,7 @@ private static T CosineSimilarityCore(ReadOnlySpan x, ReadOnlySpan y) w /// private static T Aggregate( ReadOnlySpan x) - where TTransformOperator : struct, IUnaryOperator + where TTransformOperator : struct, IUnaryOperator where TAggregationOperator : struct, IAggregationOperator { // Since every branch has a cost and since that cost is @@ -999,61 +999,61 @@ static T Vectorized128(ref T xRef, nuint remainder) switch (blocks) { case 7: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -1168,61 +1168,61 @@ static T Vectorized256(ref T xRef, nuint remainder) switch (blocks) { case 7: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -1337,61 +1337,61 @@ static T Vectorized512(ref T xRef, nuint remainder) switch (blocks) { case 7: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector512.ConditionalSelect(CreateRemainderMaskVector512((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector512.ConditionalSelect(CreateRemainderMaskVector512((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -1426,87 +1426,87 @@ static T VectorizedSmall4(ref T xRef, nuint remainder) case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - Vector256 end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 3: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2))); - goto case 2; - } + { + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2))); + goto case 2; + } case 2: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1))); - goto case 1; - } + { + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1))); + goto case 1; + } case 1: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); - goto case 0; - } + { + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); + goto case 0; + } case 0: - { - break; - } + { + break; + } } return result; @@ -1523,73 +1523,73 @@ static T VectorizedSmall8(ref T xRef, nuint remainder) case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - Vector256 end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 1: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); - goto case 0; - } + { + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); + goto case 0; + } case 0: - { - break; - } + { + break; + } } return result; @@ -1826,68 +1826,68 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder) switch (blocks) { case 7: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 1)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 1)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -2017,68 +2017,68 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder) switch (blocks) { case 7: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 1)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 1)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -2208,68 +2208,68 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder) switch (blocks) { case 7: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 1)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 1)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector512.ConditionalSelect(CreateRemainderMaskVector512((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector512.ConditionalSelect(CreateRemainderMaskVector512((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -2304,95 +2304,95 @@ static T VectorizedSmall4(ref T xRef, ref T yRef, nuint remainder) case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 3: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2))); - goto case 2; - } + { + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + Unsafe.Add(ref yRef, 2))); + goto case 2; + } case 2: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1))); - goto case 1; - } + { + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + Unsafe.Add(ref yRef, 1))); + goto case 1; + } case 1: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); - goto case 0; - } + { + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); + goto case 0; + } case 0: - { - break; - } + { + break; + } } return result; @@ -2409,79 +2409,79 @@ static T VectorizedSmall8(ref T xRef, ref T yRef, nuint remainder) case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 1: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); - goto case 0; - } + { + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); + goto case 0; + } case 0: - { - break; - } + { + break; + } } return result; @@ -3021,32 +3021,48 @@ private static int IndexOfFirstMatch(Vector512 mask) => BitOperations.TrailingZeroCount(mask.ExtractMostSignificantBits()); /// Performs an element-wise operation on and writes the results to . - /// The element type. + /// The element input type. /// Specifies the operation to perform on each element loaded from . private static void InvokeSpanIntoSpan( ReadOnlySpan x, Span destination) - where TUnaryOperator : struct, IUnaryOperator + where TUnaryOperator : struct, IUnaryOperator => + InvokeSpanIntoSpan(x, destination); + + /// Performs an element-wise operation on and writes the results to . + /// The element input type. + /// The element output type. Must be the same size as TInput if TInput and TOutput both support vectorization. + /// Specifies the operation to perform on each element loaded from . + /// + /// This supports vectorizing the operation if and are the same size. + /// Otherwise, it'll fall back to scalar operations. + /// + private static void InvokeSpanIntoSpan( + ReadOnlySpan x, Span destination) + where TUnaryOperator : struct, IUnaryOperator { if (x.Length > destination.Length) { ThrowHelper.ThrowArgument_DestinationTooShort(); } - ValidateInputOutputSpanNonOverlapping(x, destination); + if (typeof(TInput) == typeof(TOutput)) + { + ValidateInputOutputSpanNonOverlapping(x, Rename(destination)); + } // Since every branch has a cost and since that cost is // essentially lost for larger inputs, we do branches // in a way that allows us to have the minimum possible // for small sizes - ref T xRef = ref MemoryMarshal.GetReference(x); - ref T dRef = ref MemoryMarshal.GetReference(destination); + ref TInput xRef = ref MemoryMarshal.GetReference(x); + ref TOutput dRef = ref MemoryMarshal.GetReference(destination); nuint remainder = (uint)x.Length; - if (Vector512.IsHardwareAccelerated && Vector512.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) + if (Vector512.IsHardwareAccelerated && Vector512.IsSupported && Vector512.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf() >= 4 && Unsafe.SizeOf() == Unsafe.SizeOf()) { - if (remainder >= (uint)Vector512.Count) + if (remainder >= (uint)Vector512.Count) { Vectorized512(ref xRef, ref dRef, remainder); } @@ -3062,9 +3078,9 @@ private static void InvokeSpanIntoSpan( return; } - if (Vector256.IsHardwareAccelerated && Vector256.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) + if (Vector256.IsHardwareAccelerated && Vector256.IsSupported && Vector256.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf() >= 4 && Unsafe.SizeOf() == Unsafe.SizeOf()) { - if (remainder >= (uint)Vector256.Count) + if (remainder >= (uint)Vector256.Count) { Vectorized256(ref xRef, ref dRef, remainder); } @@ -3080,9 +3096,9 @@ private static void InvokeSpanIntoSpan( return; } - if (Vector128.IsHardwareAccelerated && Vector128.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf() >= 4) + if (Vector128.IsHardwareAccelerated && Vector128.IsSupported && Vector128.IsSupported && TUnaryOperator.Vectorizable && Unsafe.SizeOf() >= 4 && Unsafe.SizeOf() == Unsafe.SizeOf()) { - if (remainder >= (uint)Vector128.Count) + if (remainder >= (uint)Vector128.Count) { Vectorized128(ref xRef, ref dRef, remainder); } @@ -3104,7 +3120,7 @@ private static void InvokeSpanIntoSpan( SoftwareFallback(ref xRef, ref dRef, remainder); [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void SoftwareFallback(ref T xRef, ref T dRef, nuint length) + static void SoftwareFallback(ref TInput xRef, ref TOutput dRef, nuint length) { for (nuint i = 0; i < length; i++) { @@ -3112,31 +3128,31 @@ static void SoftwareFallback(ref T xRef, ref T dRef, nuint length) } } - static void Vectorized128(ref T xRef, ref T dRef, nuint remainder) + static void Vectorized128(ref TInput xRef, ref TOutput dRef, nuint remainder) { - ref T dRefBeg = ref dRef; + ref TOutput dRefBeg = ref dRef; // Preload the beginning and end so that overlapping accesses don't negatively impact the data - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); - if (remainder > (uint)(Vector128.Count * 8)) + if (remainder > (uint)(Vector128.Count * 8)) { // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted. - fixed (T* px = &xRef) - fixed (T* pd = &dRef) + fixed (TInput* px = &xRef) + fixed (TOutput* pd = &dRef) { - T* xPtr = px; - T* dPtr = pd; + TInput* xPtr = px; + TOutput* dPtr = pd; // We need to the ensure the underlying data can be aligned and only align // it if it can. It is possible we have an unaligned ref, in which case we // can never achieve the required SIMD alignment. - bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0; + bool canAlign = ((nuint)dPtr % (nuint)sizeof(TInput)) == 0; if (canAlign) { @@ -3146,96 +3162,96 @@ static void Vectorized128(ref T xRef, ref T dRef, nuint remainder) // are more expensive than unaligned loads and aligning both is significantly more // complex. - nuint misalignment = ((uint)sizeof(Vector128) - ((nuint)dPtr % (uint)sizeof(Vector128))) / (uint)sizeof(T); + nuint misalignment = ((uint)sizeof(Vector128) - ((nuint)dPtr % (uint)sizeof(Vector128))) / (uint)sizeof(TInput); xPtr += misalignment; dPtr += misalignment; - Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128)) == 0); + Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector128)) == 0); remainder -= misalignment; } - Vector128 vector1; - Vector128 vector2; - Vector128 vector3; - Vector128 vector4; + Vector128 vector1; + Vector128 vector2; + Vector128 vector3; + Vector128 vector4; - if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign) + if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(TInput))) && canAlign) { // This loop stores the data non-temporally, which benefits us when there // is a large amount of data involved as it avoids polluting the cache. - while (remainder >= (uint)(Vector128.Count * 8)) + while (remainder >= (uint)(Vector128.Count * 8)) { // We load, process, and store the first four vectors - vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 0))); - vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 1))); - vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 2))); - vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 3))); + vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 0))); + vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 1))); + vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 2))); + vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 3))); - vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 0)); - vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 1)); - vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 2)); - vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 3)); + vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 0)); + vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 1)); + vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 2)); + vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 3)); // We load, process, and store the next four vectors - vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 4))); - vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 5))); - vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 6))); - vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 7))); + vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 4))); + vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 5))); + vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 6))); + vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 7))); - vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 4)); - vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 5)); - vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 6)); - vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 7)); + vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 4)); + vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 5)); + vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 6)); + vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector128.Count * 7)); // We adjust the source and destination references, then update // the count of remaining elements to process. - xPtr += (uint)(Vector128.Count * 8); - dPtr += (uint)(Vector128.Count * 8); + xPtr += (uint)(Vector128.Count * 8); + dPtr += (uint)(Vector128.Count * 8); - remainder -= (uint)(Vector128.Count * 8); + remainder -= (uint)(Vector128.Count * 8); } } else { - while (remainder >= (uint)(Vector128.Count * 8)) + while (remainder >= (uint)(Vector128.Count * 8)) { // We load, process, and store the first four vectors - vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 0))); - vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 1))); - vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 2))); - vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 3))); + vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 0))); + vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 1))); + vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 2))); + vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 3))); - vector1.Store(dPtr + (uint)(Vector128.Count * 0)); - vector2.Store(dPtr + (uint)(Vector128.Count * 1)); - vector3.Store(dPtr + (uint)(Vector128.Count * 2)); - vector4.Store(dPtr + (uint)(Vector128.Count * 3)); + vector1.Store(dPtr + (uint)(Vector128.Count * 0)); + vector2.Store(dPtr + (uint)(Vector128.Count * 1)); + vector3.Store(dPtr + (uint)(Vector128.Count * 2)); + vector4.Store(dPtr + (uint)(Vector128.Count * 3)); // We load, process, and store the next four vectors - vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 4))); - vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 5))); - vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 6))); - vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 7))); + vector1 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 4))); + vector2 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 5))); + vector3 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 6))); + vector4 = TUnaryOperator.Invoke(Vector128.Load(xPtr + (uint)(Vector128.Count * 7))); - vector1.Store(dPtr + (uint)(Vector128.Count * 4)); - vector2.Store(dPtr + (uint)(Vector128.Count * 5)); - vector3.Store(dPtr + (uint)(Vector128.Count * 6)); - vector4.Store(dPtr + (uint)(Vector128.Count * 7)); + vector1.Store(dPtr + (uint)(Vector128.Count * 4)); + vector2.Store(dPtr + (uint)(Vector128.Count * 5)); + vector3.Store(dPtr + (uint)(Vector128.Count * 6)); + vector4.Store(dPtr + (uint)(Vector128.Count * 7)); // We adjust the source and destination references, then update // the count of remaining elements to process. - xPtr += (uint)(Vector128.Count * 8); - dPtr += (uint)(Vector128.Count * 8); + xPtr += (uint)(Vector128.Count * 8); + dPtr += (uint)(Vector128.Count * 8); - remainder -= (uint)(Vector128.Count * 8); + remainder -= (uint)(Vector128.Count * 8); } } @@ -3254,100 +3270,100 @@ static void Vectorized128(ref T xRef, ref T dRef, nuint remainder) // data before the first aligned address. nuint endIndex = remainder; - remainder = (remainder + (uint)(Vector128.Count - 1)) & (nuint)(-Vector128.Count); + remainder = (remainder + (uint)(Vector128.Count - 1)) & (nuint)(-Vector128.Count); - switch (remainder / (uint)Vector128.Count) + switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } - static void Vectorized256(ref T xRef, ref T dRef, nuint remainder) + static void Vectorized256(ref TInput xRef, ref TOutput dRef, nuint remainder) { - ref T dRefBeg = ref dRef; + ref TOutput dRefBeg = ref dRef; // Preload the beginning and end so that overlapping accesses don't negatively impact the data - Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); - if (remainder > (uint)(Vector256.Count * 8)) + if (remainder > (uint)(Vector256.Count * 8)) { // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted. - fixed (T* px = &xRef) - fixed (T* pd = &dRef) + fixed (TInput* px = &xRef) + fixed (TOutput* pd = &dRef) { - T* xPtr = px; - T* dPtr = pd; + TInput* xPtr = px; + TOutput* dPtr = pd; // We need to the ensure the underlying data can be aligned and only align // it if it can. It is possible we have an unaligned ref, in which case we // can never achieve the required SIMD alignment. - bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0; + bool canAlign = ((nuint)dPtr % (nuint)sizeof(TInput)) == 0; if (canAlign) { @@ -3357,96 +3373,96 @@ static void Vectorized256(ref T xRef, ref T dRef, nuint remainder) // are more expensive than unaligned loads and aligning both is significantly more // complex. - nuint misalignment = ((uint)sizeof(Vector256) - ((nuint)dPtr % (uint)sizeof(Vector256))) / (uint)sizeof(T); + nuint misalignment = ((uint)sizeof(Vector256) - ((nuint)dPtr % (uint)sizeof(Vector256))) / (uint)sizeof(TInput); xPtr += misalignment; dPtr += misalignment; - Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256)) == 0); + Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector256)) == 0); remainder -= misalignment; } - Vector256 vector1; - Vector256 vector2; - Vector256 vector3; - Vector256 vector4; + Vector256 vector1; + Vector256 vector2; + Vector256 vector3; + Vector256 vector4; - if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign) + if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(TInput))) && canAlign) { // This loop stores the data non-temporally, which benefits us when there // is a large amount of data involved as it avoids polluting the cache. - while (remainder >= (uint)(Vector256.Count * 8)) + while (remainder >= (uint)(Vector256.Count * 8)) { // We load, process, and store the first four vectors - vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 0))); - vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 1))); - vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 2))); - vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 3))); + vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 0))); + vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 1))); + vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 2))); + vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 3))); - vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 0)); - vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 1)); - vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 2)); - vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 3)); + vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 0)); + vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 1)); + vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 2)); + vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 3)); // We load, process, and store the next four vectors - vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 4))); - vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 5))); - vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 6))); - vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 7))); + vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 4))); + vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 5))); + vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 6))); + vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 7))); - vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 4)); - vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 5)); - vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 6)); - vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 7)); + vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 4)); + vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 5)); + vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 6)); + vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector256.Count * 7)); // We adjust the source and destination references, then update // the count of remaining elements to process. - xPtr += (uint)(Vector256.Count * 8); - dPtr += (uint)(Vector256.Count * 8); + xPtr += (uint)(Vector256.Count * 8); + dPtr += (uint)(Vector256.Count * 8); - remainder -= (uint)(Vector256.Count * 8); + remainder -= (uint)(Vector256.Count * 8); } } else { - while (remainder >= (uint)(Vector256.Count * 8)) + while (remainder >= (uint)(Vector256.Count * 8)) { // We load, process, and store the first four vectors - vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 0))); - vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 1))); - vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 2))); - vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 3))); + vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 0))); + vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 1))); + vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 2))); + vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 3))); - vector1.Store(dPtr + (uint)(Vector256.Count * 0)); - vector2.Store(dPtr + (uint)(Vector256.Count * 1)); - vector3.Store(dPtr + (uint)(Vector256.Count * 2)); - vector4.Store(dPtr + (uint)(Vector256.Count * 3)); + vector1.Store(dPtr + (uint)(Vector256.Count * 0)); + vector2.Store(dPtr + (uint)(Vector256.Count * 1)); + vector3.Store(dPtr + (uint)(Vector256.Count * 2)); + vector4.Store(dPtr + (uint)(Vector256.Count * 3)); // We load, process, and store the next four vectors - vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 4))); - vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 5))); - vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 6))); - vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 7))); + vector1 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 4))); + vector2 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 5))); + vector3 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 6))); + vector4 = TUnaryOperator.Invoke(Vector256.Load(xPtr + (uint)(Vector256.Count * 7))); - vector1.Store(dPtr + (uint)(Vector256.Count * 4)); - vector2.Store(dPtr + (uint)(Vector256.Count * 5)); - vector3.Store(dPtr + (uint)(Vector256.Count * 6)); - vector4.Store(dPtr + (uint)(Vector256.Count * 7)); + vector1.Store(dPtr + (uint)(Vector256.Count * 4)); + vector2.Store(dPtr + (uint)(Vector256.Count * 5)); + vector3.Store(dPtr + (uint)(Vector256.Count * 6)); + vector4.Store(dPtr + (uint)(Vector256.Count * 7)); // We adjust the source and destination references, then update // the count of remaining elements to process. - xPtr += (uint)(Vector256.Count * 8); - dPtr += (uint)(Vector256.Count * 8); + xPtr += (uint)(Vector256.Count * 8); + dPtr += (uint)(Vector256.Count * 8); - remainder -= (uint)(Vector256.Count * 8); + remainder -= (uint)(Vector256.Count * 8); } } @@ -3465,100 +3481,100 @@ static void Vectorized256(ref T xRef, ref T dRef, nuint remainder) // data before the first aligned address. nuint endIndex = remainder; - remainder = (remainder + (uint)(Vector256.Count - 1)) & (nuint)(-Vector256.Count); + remainder = (remainder + (uint)(Vector256.Count - 1)) & (nuint)(-Vector256.Count); - switch (remainder / (uint)Vector256.Count) + switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } - static void Vectorized512(ref T xRef, ref T dRef, nuint remainder) + static void Vectorized512(ref TInput xRef, ref TOutput dRef, nuint remainder) { - ref T dRefBeg = ref dRef; + ref TOutput dRefBeg = ref dRef; // Preload the beginning and end so that overlapping accesses don't negatively impact the data - Vector512 beg = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef)); - Vector512 end = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512.Count)); + Vector512 beg = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef)); + Vector512 end = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)Vector512.Count)); - if (remainder > (uint)(Vector512.Count * 8)) + if (remainder > (uint)(Vector512.Count * 8)) { // Pinning is cheap and will be short lived for small inputs and unlikely to be impactful // for large inputs (> 85KB) which are on the LOH and unlikely to be compacted. - fixed (T* px = &xRef) - fixed (T* pd = &dRef) + fixed (TInput* px = &xRef) + fixed (TOutput* pd = &dRef) { - T* xPtr = px; - T* dPtr = pd; + TInput* xPtr = px; + TOutput* dPtr = pd; // We need to the ensure the underlying data can be aligned and only align // it if it can. It is possible we have an unaligned ref, in which case we // can never achieve the required SIMD alignment. - bool canAlign = ((nuint)dPtr % (nuint)sizeof(T)) == 0; + bool canAlign = ((nuint)dPtr % (nuint)sizeof(TInput)) == 0; if (canAlign) { @@ -3568,96 +3584,96 @@ static void Vectorized512(ref T xRef, ref T dRef, nuint remainder) // are more expensive than unaligned loads and aligning both is significantly more // complex. - nuint misalignment = ((uint)sizeof(Vector512) - ((nuint)dPtr % (uint)sizeof(Vector512))) / (uint)sizeof(T); + nuint misalignment = ((uint)sizeof(Vector512) - ((nuint)dPtr % (uint)sizeof(Vector512))) / (uint)sizeof(TInput); xPtr += misalignment; dPtr += misalignment; - Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512)) == 0); + Debug.Assert(((nuint)dPtr % (uint)sizeof(Vector512)) == 0); remainder -= misalignment; } - Vector512 vector1; - Vector512 vector2; - Vector512 vector3; - Vector512 vector4; + Vector512 vector1; + Vector512 vector2; + Vector512 vector3; + Vector512 vector4; - if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(T))) && canAlign) + if ((remainder > (NonTemporalByteThreshold / (nuint)sizeof(TInput))) && canAlign) { // This loop stores the data non-temporally, which benefits us when there // is a large amount of data involved as it avoids polluting the cache. - while (remainder >= (uint)(Vector512.Count * 8)) + while (remainder >= (uint)(Vector512.Count * 8)) { // We load, process, and store the first four vectors - vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 0))); - vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 1))); - vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 2))); - vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 3))); + vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 0))); + vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 1))); + vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 2))); + vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 3))); - vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 0)); - vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 1)); - vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 2)); - vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 3)); + vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 0)); + vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 1)); + vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 2)); + vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 3)); // We load, process, and store the next four vectors - vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 4))); - vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 5))); - vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 6))); - vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 7))); + vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 4))); + vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 5))); + vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 6))); + vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 7))); - vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 4)); - vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 5)); - vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 6)); - vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 7)); + vector1.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 4)); + vector2.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 5)); + vector3.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 6)); + vector4.StoreAlignedNonTemporal(dPtr + (uint)(Vector512.Count * 7)); // We adjust the source and destination references, then update // the count of remaining elements to process. - xPtr += (uint)(Vector512.Count * 8); - dPtr += (uint)(Vector512.Count * 8); + xPtr += (uint)(Vector512.Count * 8); + dPtr += (uint)(Vector512.Count * 8); - remainder -= (uint)(Vector512.Count * 8); + remainder -= (uint)(Vector512.Count * 8); } } else { - while (remainder >= (uint)(Vector512.Count * 8)) + while (remainder >= (uint)(Vector512.Count * 8)) { // We load, process, and store the first four vectors - vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 0))); - vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 1))); - vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 2))); - vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 3))); + vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 0))); + vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 1))); + vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 2))); + vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 3))); - vector1.Store(dPtr + (uint)(Vector512.Count * 0)); - vector2.Store(dPtr + (uint)(Vector512.Count * 1)); - vector3.Store(dPtr + (uint)(Vector512.Count * 2)); - vector4.Store(dPtr + (uint)(Vector512.Count * 3)); + vector1.Store(dPtr + (uint)(Vector512.Count * 0)); + vector2.Store(dPtr + (uint)(Vector512.Count * 1)); + vector3.Store(dPtr + (uint)(Vector512.Count * 2)); + vector4.Store(dPtr + (uint)(Vector512.Count * 3)); // We load, process, and store the next four vectors - vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 4))); - vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 5))); - vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 6))); - vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 7))); + vector1 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 4))); + vector2 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 5))); + vector3 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 6))); + vector4 = TUnaryOperator.Invoke(Vector512.Load(xPtr + (uint)(Vector512.Count * 7))); - vector1.Store(dPtr + (uint)(Vector512.Count * 4)); - vector2.Store(dPtr + (uint)(Vector512.Count * 5)); - vector3.Store(dPtr + (uint)(Vector512.Count * 6)); - vector4.Store(dPtr + (uint)(Vector512.Count * 7)); + vector1.Store(dPtr + (uint)(Vector512.Count * 4)); + vector2.Store(dPtr + (uint)(Vector512.Count * 5)); + vector3.Store(dPtr + (uint)(Vector512.Count * 6)); + vector4.Store(dPtr + (uint)(Vector512.Count * 7)); // We adjust the source and destination references, then update // the count of remaining elements to process. - xPtr += (uint)(Vector512.Count * 8); - dPtr += (uint)(Vector512.Count * 8); + xPtr += (uint)(Vector512.Count * 8); + dPtr += (uint)(Vector512.Count * 8); - remainder -= (uint)(Vector512.Count * 8); + remainder -= (uint)(Vector512.Count * 8); } } @@ -3676,93 +3692,93 @@ static void Vectorized512(ref T xRef, ref T dRef, nuint remainder) // data before the first aligned address. nuint endIndex = remainder; - remainder = (remainder + (uint)(Vector512.Count - 1)) & (nuint)(-Vector512.Count); + remainder = (remainder + (uint)(Vector512.Count - 1)) & (nuint)(-Vector512.Count); - switch (remainder / (uint)Vector512.Count) + switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void VectorizedSmall(ref T xRef, ref T dRef, nuint remainder) + static void VectorizedSmall(ref TInput xRef, ref TOutput dRef, nuint remainder) { - if (sizeof(T) == 4) + if (sizeof(TInput) == 4) { VectorizedSmall4(ref xRef, ref dRef, remainder); } else { - Debug.Assert(sizeof(T) == 8); + Debug.Assert(sizeof(TInput) == 8); VectorizedSmall8(ref xRef, ref dRef, remainder); } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void VectorizedSmall4(ref T xRef, ref T dRef, nuint remainder) + static void VectorizedSmall4(ref TInput xRef, ref TOutput dRef, nuint remainder) { - Debug.Assert(sizeof(T) == 4); + Debug.Assert(sizeof(TInput) == 4); switch (remainder) { @@ -3773,145 +3789,397 @@ static void VectorizedSmall4(ref T xRef, ref T dRef, nuint remainder) case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2)); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2)); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1)); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1)); + goto case 1; + } case 1: - { - dRef = TUnaryOperator.Invoke(xRef); - goto case 0; - } + { + dRef = TUnaryOperator.Invoke(xRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } [MethodImpl(MethodImplOptions.AggressiveInlining)] - static void VectorizedSmall8(ref T xRef, ref T dRef, nuint remainder) + static void VectorizedSmall8(ref TInput xRef, ref TOutput dRef, nuint remainder) { - Debug.Assert(sizeof(T) == 8); + Debug.Assert(sizeof(TInput) == 8); switch (remainder) { case 7: case 6: case 5: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + + break; + } + + case 4: + { + Debug.Assert(Vector256.IsHardwareAccelerated); + + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 3: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + + break; + } + + case 2: + { + Debug.Assert(Vector128.IsHardwareAccelerated); + + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + beg.StoreUnsafe(ref dRef); + + break; + } + + case 1: + { + dRef = TUnaryOperator.Invoke(xRef); + goto case 0; + } + + case 0: + { + break; + } + } + } + } + + /// Performs an element-wise operation on and writes the results to . + /// The element input type. + /// The element output type. Must be the same size as TInput if TInput and TOutput both support vectorization. + /// Specifies the operation to perform on each element loaded from . + /// This should only be used when it's known that TInput/TOutput are vectorizable and the size of TInput is twice that of TOutput. + private static void InvokeSpanIntoSpan_2to1( + ReadOnlySpan x, Span destination) + where TUnaryOperator : struct, IUnaryTwoToOneOperator + { + Debug.Assert(sizeof(TInput) == sizeof(TOutput) * 2); + + if (x.Length > destination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(); + } + + ref TInput xRef = ref MemoryMarshal.GetReference(x); + ref TOutput destinationRef = ref MemoryMarshal.GetReference(destination); + int i = 0, twoVectorsFromEnd; + + if (Vector512.IsHardwareAccelerated && TUnaryOperator.Vectorizable) + { + Debug.Assert(Vector512.IsSupported); + Debug.Assert(Vector512.IsSupported); + + twoVectorsFromEnd = x.Length - (Vector512.Count * 2); + if (i <= twoVectorsFromEnd) + { + // Loop handling two input vectors / one output vector at a time. + do { - Debug.Assert(Vector256.IsHardwareAccelerated); + TUnaryOperator.Invoke( + Vector512.LoadUnsafe(ref xRef, (uint)i), + Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512.Count))).StoreUnsafe(ref destinationRef, (uint)i); - Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + i += Vector512.Count * 2; + } + while (i <= twoVectorsFromEnd); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + // Handle any remaining elements with final vectors. + if (i != x.Length) + { + i = x.Length - (Vector512.Count * 2); - break; + TUnaryOperator.Invoke( + Vector512.LoadUnsafe(ref xRef, (uint)i), + Vector512.LoadUnsafe(ref xRef, (uint)(i + Vector512.Count))).StoreUnsafe(ref destinationRef, (uint)i); } - case 4: + return; + } + } + + if (Vector256.IsHardwareAccelerated && TUnaryOperator.Vectorizable) + { + Debug.Assert(Vector256.IsSupported); + Debug.Assert(Vector256.IsSupported); + + twoVectorsFromEnd = x.Length - (Vector256.Count * 2); + if (i <= twoVectorsFromEnd) + { + // Loop handling two input vectors / one output vector at a time. + do { - Debug.Assert(Vector256.IsHardwareAccelerated); + TUnaryOperator.Invoke( + Vector256.LoadUnsafe(ref xRef, (uint)i), + Vector256.LoadUnsafe(ref xRef, (uint)(i + Vector256.Count))).StoreUnsafe(ref destinationRef, (uint)i); - Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - beg.StoreUnsafe(ref dRef); + i += Vector256.Count * 2; + } + while (i <= twoVectorsFromEnd); + + // Handle any remaining elements with final vectors. + if (i != x.Length) + { + i = x.Length - (Vector256.Count * 2); - break; + TUnaryOperator.Invoke( + Vector256.LoadUnsafe(ref xRef, (uint)i), + Vector256.LoadUnsafe(ref xRef, (uint)(i + Vector256.Count))).StoreUnsafe(ref destinationRef, (uint)i); } - case 3: + return; + } + } + + if (Vector128.IsHardwareAccelerated && TUnaryOperator.Vectorizable) + { + Debug.Assert(Vector128.IsSupported); + Debug.Assert(Vector128.IsSupported); + + twoVectorsFromEnd = x.Length - (Vector256.Count * 2); + if (i <= twoVectorsFromEnd) + { + // Loop handling two input vectors / one output vector at a time. + do { - Debug.Assert(Vector128.IsHardwareAccelerated); + TUnaryOperator.Invoke( + Vector128.LoadUnsafe(ref xRef, (uint)i), + Vector128.LoadUnsafe(ref xRef, (uint)(i + Vector128.Count))).StoreUnsafe(ref destinationRef, (uint)i); - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + i += Vector128.Count * 2; + } + while (i <= twoVectorsFromEnd); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + // Handle any remaining elements with final vectors. + if (i != x.Length) + { + i = x.Length - (Vector128.Count * 2); - break; + TUnaryOperator.Invoke( + Vector128.LoadUnsafe(ref xRef, (uint)i), + Vector128.LoadUnsafe(ref xRef, (uint)(i + Vector128.Count))).StoreUnsafe(ref destinationRef, (uint)i); } - case 2: + return; + } + } + + while (i < x.Length) + { + Unsafe.Add(ref destinationRef, i) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, i)); + i++; + } + } + + /// Performs an element-wise operation on and writes the results to . + /// The element input type. + /// The element output type. Must be the same size as TInput if TInput and TOutput both support vectorization. + /// Specifies the operation to perform on each element loaded from . + /// This should only be used when it's known that TInput/TOutput are vectorizable and the size of TInput is half that of TOutput. + private static void InvokeSpanIntoSpan_1to2( + ReadOnlySpan x, Span destination) + where TUnaryOperator : struct, IUnaryOneToTwoOperator + { + Debug.Assert(sizeof(TInput) * 2 == sizeof(TOutput)); + + if (x.Length > destination.Length) + { + ThrowHelper.ThrowArgument_DestinationTooShort(); + } + + ref TInput sourceRef = ref MemoryMarshal.GetReference(x); + ref TOutput destinationRef = ref MemoryMarshal.GetReference(destination); + int i = 0, oneVectorFromEnd; + + if (Vector512.IsHardwareAccelerated && TUnaryOperator.Vectorizable) + { + Debug.Assert(Vector512.IsSupported); + Debug.Assert(Vector512.IsSupported); + + oneVectorFromEnd = x.Length - Vector512.Count; + if (i <= oneVectorFromEnd) + { + // Loop handling one input vector / two output vectors at a time. + do { - Debug.Assert(Vector128.IsHardwareAccelerated); + (Vector512 lower, Vector512 upper) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i)); + lower.StoreUnsafe(ref destinationRef, (uint)i); + upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector512.Count)); - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - beg.StoreUnsafe(ref dRef); + i += Vector512.Count; + } + while (i <= oneVectorFromEnd); + + // Handle any remaining elements with a final input vector. + if (i != x.Length) + { + i = x.Length - Vector512.Count; - break; + (Vector512 lower, Vector512 upper) = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref sourceRef, (uint)i)); + lower.StoreUnsafe(ref destinationRef, (uint)i); + upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector512.Count)); } - case 1: + return; + } + } + + if (Vector256.IsHardwareAccelerated && TUnaryOperator.Vectorizable) + { + Debug.Assert(Vector256.IsSupported); + Debug.Assert(Vector256.IsSupported); + + oneVectorFromEnd = x.Length - Vector256.Count; + if (i <= oneVectorFromEnd) + { + // Loop handling one input vector / two output vectors at a time. + do { - dRef = TUnaryOperator.Invoke(xRef); - goto case 0; + (Vector256 lower, Vector256 upper) = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref sourceRef, (uint)i)); + lower.StoreUnsafe(ref destinationRef, (uint)i); + upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector256.Count)); + + i += Vector256.Count; } + while (i <= oneVectorFromEnd); - case 0: + // Handle any remaining elements with a final input vector. + if (i != x.Length) + { + i = x.Length - Vector256.Count; + + (Vector256 lower, Vector256 upper) = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref sourceRef, (uint)i)); + lower.StoreUnsafe(ref destinationRef, (uint)i); + upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector256.Count)); + } + + return; + } + } + + if (Vector128.IsHardwareAccelerated && TUnaryOperator.Vectorizable) + { + Debug.Assert(Vector128.IsSupported); + Debug.Assert(Vector128.IsSupported); + + oneVectorFromEnd = x.Length - Vector128.Count; + if (i <= oneVectorFromEnd) + { + // Loop handling one input vector / two output vectors at a time. + do + { + (Vector128 lower, Vector128 upper) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i)); + lower.StoreUnsafe(ref destinationRef, (uint)i); + upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector128.Count)); + + i += Vector128.Count; + } + while (i <= oneVectorFromEnd); + + // Handle any remaining elements with a final input vector. + if (i != x.Length) { - break; + i = x.Length - Vector128.Count; + + (Vector128 lower, Vector128 upper) = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref sourceRef, (uint)i)); + lower.StoreUnsafe(ref destinationRef, (uint)i); + upper.StoreUnsafe(ref destinationRef, (uint)(i + Vector128.Count)); } + + return; } } + + while (i < x.Length) + { + Unsafe.Add(ref destinationRef, i) = TUnaryOperator.Invoke(Unsafe.Add(ref sourceRef, i)); + i++; + } } /// @@ -4190,74 +4458,74 @@ static void Vectorized128(ref T xRef, ref T yRef, ref T dRef, nuint remainder) switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -4432,74 +4700,74 @@ static void Vectorized256(ref T xRef, ref T yRef, ref T dRef, nuint remainder) switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -4674,74 +4942,74 @@ static void Vectorized512(ref T xRef, ref T yRef, ref T dRef, nuint remainder) switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -4773,83 +5041,83 @@ static void VectorizedSmall4(ref T xRef, ref T yRef, ref T dRef, nuint remainder case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2)); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + Unsafe.Add(ref yRef, 2)); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1)); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + Unsafe.Add(ref yRef, 1)); + goto case 1; + } case 1: - { - dRef = TBinaryOperator.Invoke(xRef, yRef); - goto case 0; - } + { + dRef = TBinaryOperator.Invoke(xRef, yRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -4863,67 +5131,67 @@ static void VectorizedSmall8(ref T xRef, ref T yRef, ref T dRef, nuint remainder case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TBinaryOperator.Invoke(xRef, yRef); - goto case 0; - } + { + dRef = TBinaryOperator.Invoke(xRef, yRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } @@ -4968,7 +5236,7 @@ private static void InvokeSpanScalarIntoSpan( /// private static void InvokeSpanScalarIntoSpan( ReadOnlySpan x, T y, Span destination) - where TTransformOperator : struct, IUnaryOperator + where TTransformOperator : struct, IUnaryOperator where TBinaryOperator : struct, IBinaryOperator { if (x.Length > destination.Length) @@ -5224,74 +5492,74 @@ static void Vectorized128(ref T xRef, T y, ref T dRef, nuint remainder) switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -5462,74 +5730,74 @@ static void Vectorized256(ref T xRef, T y, ref T dRef, nuint remainder) switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -5700,74 +5968,74 @@ static void Vectorized512(ref T xRef, T y, ref T dRef, nuint remainder) switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -5799,87 +6067,87 @@ static void VectorizedSmall4(ref T xRef, T y, ref T dRef, nuint remainder) case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 yVec = Vector256.Create(y); + Vector256 yVec = Vector256.Create(y); - Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), - yVec); - Vector256 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)), - yVec); + Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), + yVec); + Vector256 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)), + yVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), - Vector256.Create(y)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), + Vector256.Create(y)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 yVec = Vector128.Create(y); + Vector128 yVec = Vector128.Create(y); - Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), - yVec); - Vector128 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)), - yVec); + Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), + yVec); + Vector128 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)), + yVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), - Vector128.Create(y)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), + Vector128.Create(y)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)), - y); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)), + y); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)), - y); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)), + y); + goto case 1; + } case 1: - { - dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); - goto case 0; - } + { + dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -5893,71 +6161,71 @@ static void VectorizedSmall8(ref T xRef, T y, ref T dRef, nuint remainder) case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 yVec = Vector256.Create(y); + Vector256 yVec = Vector256.Create(y); - Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), - yVec); - Vector256 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)), - yVec); + Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), + yVec); + Vector256 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)), + yVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), - Vector256.Create(y)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), + Vector256.Create(y)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 yVec = Vector128.Create(y); + Vector128 yVec = Vector128.Create(y); - Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), - yVec); - Vector128 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)), - yVec); + Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), + yVec); + Vector128 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)), + yVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), - Vector128.Create(y)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), + Vector128.Create(y)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); - goto case 0; - } + { + dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } @@ -6266,81 +6534,81 @@ static void Vectorized128(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -6539,81 +6807,81 @@ static void Vectorized256(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -6812,81 +7080,81 @@ static void Vectorized512(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -6918,91 +7186,91 @@ static void VectorizedSmall4(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nui case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.LoadUnsafe(ref zRef)); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.LoadUnsafe(ref zRef)); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.LoadUnsafe(ref zRef)); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.LoadUnsafe(ref zRef)); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2), - Unsafe.Add(ref zRef, 2)); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + Unsafe.Add(ref yRef, 2), + Unsafe.Add(ref zRef, 2)); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1), - Unsafe.Add(ref zRef, 1)); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + Unsafe.Add(ref yRef, 1), + Unsafe.Add(ref zRef, 1)); + goto case 1; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -7016,73 +7284,73 @@ static void VectorizedSmall8(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nui case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.LoadUnsafe(ref zRef)); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.LoadUnsafe(ref zRef)); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.LoadUnsafe(ref zRef)); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.LoadUnsafe(ref zRef)); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } @@ -7385,81 +7653,81 @@ static void Vectorized128(ref T xRef, ref T yRef, T z, ref T dRef, nuint remaind switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } - case 3: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + case 3: + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -7654,81 +7922,81 @@ static void Vectorized256(ref T xRef, ref T yRef, T z, ref T dRef, nuint remaind switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -7923,81 +8191,81 @@ static void Vectorized512(ref T xRef, ref T yRef, T z, ref T dRef, nuint remaind switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -8029,95 +8297,95 @@ static void VectorizedSmall4(ref T xRef, ref T yRef, T z, ref T dRef, nuint rema case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 zVec = Vector256.Create(z); + Vector256 zVec = Vector256.Create(z); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - zVec); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), - zVec); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + zVec); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), + zVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.Create(z)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.Create(z)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 zVec = Vector128.Create(z); + Vector128 zVec = Vector128.Create(z); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - zVec); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), - zVec); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + zVec); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), + zVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.Create(z)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.Create(z)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2), - z); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + Unsafe.Add(ref yRef, 2), + z); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1), - z); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + Unsafe.Add(ref yRef, 1), + z); + goto case 1; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, z); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, yRef, z); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -8131,77 +8399,77 @@ static void VectorizedSmall8(ref T xRef, ref T yRef, T z, ref T dRef, nuint rema case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 zVec = Vector256.Create(z); + Vector256 zVec = Vector256.Create(z); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - zVec); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), - zVec); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + zVec); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), + zVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.Create(z)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.Create(z)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 zVec = Vector128.Create(z); + Vector128 zVec = Vector128.Create(z); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - zVec); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), - zVec); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + zVec); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), + zVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.Create(z)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.Create(z)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, z); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, yRef, z); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } @@ -8504,81 +8772,81 @@ static void Vectorized128(ref T xRef, T y, ref T zRef, ref T dRef, nuint remaind switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -8773,81 +9041,81 @@ static void Vectorized256(ref T xRef, T y, ref T zRef, ref T dRef, nuint remaind switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -9042,81 +9310,81 @@ static void Vectorized512(ref T xRef, T y, ref T zRef, ref T dRef, nuint remaind switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -9148,95 +9416,95 @@ static void VectorizedSmall4(ref T xRef, T y, ref T zRef, ref T dRef, nuint rema case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 yVec = Vector256.Create(y); + Vector256 yVec = Vector256.Create(y); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - yVec, - Vector256.LoadUnsafe(ref zRef)); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + yVec, + Vector256.LoadUnsafe(ref zRef)); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.Create(y), - Vector256.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.Create(y), + Vector256.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 yVec = Vector128.Create(y); + Vector128 yVec = Vector128.Create(y); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - yVec, - Vector128.LoadUnsafe(ref zRef)); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + yVec, + Vector128.LoadUnsafe(ref zRef)); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.Create(y), - Vector128.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.Create(y), + Vector128.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - y, - Unsafe.Add(ref zRef, 2)); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + y, + Unsafe.Add(ref zRef, 2)); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - y, - Unsafe.Add(ref zRef, 1)); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + y, + Unsafe.Add(ref zRef, 1)); + goto case 1; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, y, zRef); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, y, zRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -9250,77 +9518,77 @@ static void VectorizedSmall8(ref T xRef, T y, ref T zRef, ref T dRef, nuint rema case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 yVec = Vector256.Create(y); + Vector256 yVec = Vector256.Create(y); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - yVec, - Vector256.LoadUnsafe(ref zRef)); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + yVec, + Vector256.LoadUnsafe(ref zRef)); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.Create(y), - Vector256.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.Create(y), + Vector256.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 yVec = Vector128.Create(y); + Vector128 yVec = Vector128.Create(y); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - yVec, - Vector128.LoadUnsafe(ref zRef)); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + yVec, + Vector128.LoadUnsafe(ref zRef)); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.Create(y), - Vector128.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.Create(y), + Vector128.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, y, zRef); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, y, zRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } @@ -9740,6 +10008,32 @@ ref Unsafe.As(ref MemoryMarshal.GetReference(RemainderUInt64Mask_8x9)) throw new NotSupportedException(); } + /// Creates a span of from a when they're the same type. + private static unsafe ReadOnlySpan Rename(ReadOnlySpan span) + { + Debug.Assert(sizeof(TFrom) == sizeof(TTo)); + return MemoryMarshal.CreateReadOnlySpan(ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length); + } + + /// Creates a span of from a when they're the same type. + private static unsafe Span Rename(Span span) + { + Debug.Assert(sizeof(TFrom) == sizeof(TTo)); + return MemoryMarshal.CreateSpan(ref Unsafe.As(ref MemoryMarshal.GetReference(span)), span.Length); + } + + /// Gets whether is or if in a 32-bit process. + private static bool IsUInt32Like() => typeof(T) == typeof(uint) || (IntPtr.Size == 4 && typeof(T) == typeof(nuint)); + + /// Gets whether is or if in a 32-bit process. + private static bool IsInt32Like() => typeof(T) == typeof(int) || (IntPtr.Size == 4 && typeof(T) == typeof(nint)); + + /// Gets whether is or if in a 64-bit process. + private static bool IsUInt64Like() => typeof(T) == typeof(ulong) || (IntPtr.Size == 8 && typeof(T) == typeof(nuint)); + + /// Gets whether is or if in a 64-bit process. + private static bool IsInt64Like() => typeof(T) == typeof(long) || (IntPtr.Size == 8 && typeof(T) == typeof(nint)); + /// x + y internal readonly struct AddOperator : IAggregationOperator where T : IAdditionOperators, IAdditiveIdentity { @@ -9846,7 +10140,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) // Ieee754Remainder - internal readonly struct ReciprocalOperator : IUnaryOperator where T : IFloatingPoint + internal readonly struct ReciprocalOperator : IUnaryOperator where T : IFloatingPoint { public static bool Vectorizable => true; public static T Invoke(T x) => T.One / x; @@ -9855,7 +10149,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static Vector512 Invoke(Vector512 x) => Vector512.One / x; } - private readonly struct ReciprocalSqrtOperator : IUnaryOperator where T : IFloatingPointIeee754 + private readonly struct ReciprocalSqrtOperator : IUnaryOperator where T : IFloatingPointIeee754 { public static bool Vectorizable => true; public static T Invoke(T x) => T.One / T.Sqrt(x); @@ -9864,7 +10158,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static Vector512 Invoke(Vector512 x) => Vector512.One / Vector512.Sqrt(x); } - private readonly struct ReciprocalEstimateOperator : IUnaryOperator where T : IFloatingPointIeee754 + private readonly struct ReciprocalEstimateOperator : IUnaryOperator where T : IFloatingPointIeee754 { public static bool Vectorizable => true; @@ -9912,7 +10206,7 @@ public static Vector512 Invoke(Vector512 x) } } - private readonly struct ReciprocalSqrtEstimateOperator : IUnaryOperator where T : IFloatingPointIeee754 + private readonly struct ReciprocalSqrtEstimateOperator : IUnaryOperator where T : IFloatingPointIeee754 { public static bool Vectorizable => true; @@ -9991,7 +10285,7 @@ public static Vector512 Invoke(Vector512 x) } /// ~x - internal readonly struct OnesComplementOperator : IUnaryOperator where T : IBitwiseOperators + internal readonly struct OnesComplementOperator : IUnaryOperator where T : IBitwiseOperators { public static bool Vectorizable => true; public static T Invoke(T x) => ~x; @@ -11221,7 +11515,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) } /// -x - internal readonly struct NegateOperator : IUnaryOperator where T : IUnaryNegationOperators + internal readonly struct NegateOperator : IUnaryOperator where T : IUnaryNegationOperators { public static bool Vectorizable => true; public static T Invoke(T x) => -x; @@ -11267,7 +11561,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) } /// x - internal readonly struct IdentityOperator : IUnaryOperator + internal readonly struct IdentityOperator : IUnaryOperator { public static bool Vectorizable => true; public static T Invoke(T x) => x; @@ -11277,7 +11571,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) } /// x * x - internal readonly struct SquaredOperator : IUnaryOperator where T : IMultiplyOperators + internal readonly struct SquaredOperator : IUnaryOperator where T : IMultiplyOperators { public static bool Vectorizable => true; public static T Invoke(T x) => x * x; @@ -11287,7 +11581,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) } /// T.Abs(x) - internal readonly struct AbsoluteOperator : IUnaryOperator where T : INumberBase + internal readonly struct AbsoluteOperator : IUnaryOperator where T : INumberBase { public static bool Vectorizable => true; @@ -11367,7 +11661,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Exp(x) - internal readonly struct ExpOperator : IUnaryOperator + internal readonly struct ExpOperator : IUnaryOperator where T : IExponentialFunctions { public static bool Vectorizable => (typeof(T) == typeof(double)) @@ -11453,7 +11747,7 @@ public static Vector512 Invoke(Vector512 x) #if !NET9_0_OR_GREATER /// double.Exp(x) - internal readonly struct ExpOperatorDouble : IUnaryOperator + internal readonly struct ExpOperatorDouble : IUnaryOperator { // This code is based on `vrd2_exp` from amd/aocl-libm-ose // Copyright (C) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. @@ -11672,7 +11966,7 @@ public static Vector512 Invoke(Vector512 x) } /// float.Exp(x) - internal readonly struct ExpOperatorSingle : IUnaryOperator + internal readonly struct ExpOperatorSingle : IUnaryOperator { // This code is based on `vrs4_expf` from amd/aocl-libm-ose // Copyright (C) 2019-2022 Advanced Micro Devices, Inc. All rights reserved. @@ -11953,7 +12247,7 @@ public static Vector512 Invoke(Vector512 x) #endif /// T.ExpM1(x) - internal readonly struct ExpM1Operator : IUnaryOperator + internal readonly struct ExpM1Operator : IUnaryOperator where T : IExponentialFunctions { public static bool Vectorizable => ExpOperator.Vectorizable; @@ -11965,7 +12259,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Exp2(x) - internal readonly struct Exp2Operator : IUnaryOperator + internal readonly struct Exp2Operator : IUnaryOperator where T : IExponentialFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -11977,7 +12271,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Exp2M1(x) - internal readonly struct Exp2M1Operator : IUnaryOperator + internal readonly struct Exp2M1Operator : IUnaryOperator where T : IExponentialFunctions { public static bool Vectorizable => Exp2Operator.Vectorizable; @@ -11989,7 +12283,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Exp10(x) - internal readonly struct Exp10Operator : IUnaryOperator + internal readonly struct Exp10Operator : IUnaryOperator where T : IExponentialFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12001,7 +12295,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Exp10M1(x) - internal readonly struct Exp10M1Operator : IUnaryOperator + internal readonly struct Exp10M1Operator : IUnaryOperator where T : IExponentialFunctions { public static bool Vectorizable => Exp2Operator.Vectorizable; @@ -12024,7 +12318,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Sqrt(x) - internal readonly struct SqrtOperator : IUnaryOperator + internal readonly struct SqrtOperator : IUnaryOperator where T : IRootFunctions { public static bool Vectorizable => true; @@ -12035,7 +12329,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Cbrt(x) - internal readonly struct CbrtOperator : IUnaryOperator + internal readonly struct CbrtOperator : IUnaryOperator where T : IRootFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12057,7 +12351,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Acos(x) - internal readonly struct AcosOperator : IUnaryOperator + internal readonly struct AcosOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12068,7 +12362,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Acosh(x) - internal readonly struct AcoshOperator : IUnaryOperator + internal readonly struct AcoshOperator : IUnaryOperator where T : IHyperbolicFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12079,7 +12373,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.AcosPi(x) - internal readonly struct AcosPiOperator : IUnaryOperator + internal readonly struct AcosPiOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => AcosOperator.Vectorizable; @@ -12090,7 +12384,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Asin(x) - internal readonly struct AsinOperator : IUnaryOperator + internal readonly struct AsinOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12101,7 +12395,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Asinh(x) - internal readonly struct AsinhOperator : IUnaryOperator + internal readonly struct AsinhOperator : IUnaryOperator where T : IHyperbolicFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12112,7 +12406,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.AsinPi(x) - internal readonly struct AsinPiOperator : IUnaryOperator + internal readonly struct AsinPiOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => AsinOperator.Vectorizable; @@ -12123,7 +12417,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Atan(x) - internal readonly struct AtanOperator : IUnaryOperator + internal readonly struct AtanOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12134,7 +12428,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Atanh(x) - internal readonly struct AtanhOperator : IUnaryOperator + internal readonly struct AtanhOperator : IUnaryOperator where T : IHyperbolicFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12145,7 +12439,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.AtanPi(x) - internal readonly struct AtanPiOperator : IUnaryOperator + internal readonly struct AtanPiOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => AtanOperator.Vectorizable; @@ -12178,7 +12472,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Cos(x) - internal readonly struct CosOperator : IUnaryOperator + internal readonly struct CosOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12189,7 +12483,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.CosPi(x) - internal readonly struct CosPiOperator : IUnaryOperator + internal readonly struct CosPiOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => CosOperator.Vectorizable; @@ -12200,7 +12494,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Cosh(x) - internal readonly struct CoshOperator : IUnaryOperator + internal readonly struct CoshOperator : IUnaryOperator where T : IHyperbolicFunctions { // This code is based on `vrs4_coshf` from amd/aocl-libm-ose @@ -12264,7 +12558,7 @@ public static Vector512 Invoke(Vector512 t) } /// T.Sin(x) - internal readonly struct SinOperator : IUnaryOperator + internal readonly struct SinOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12275,7 +12569,7 @@ public static Vector512 Invoke(Vector512 t) } /// T.SinPi(x) - internal readonly struct SinPiOperator : IUnaryOperator + internal readonly struct SinPiOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => SinOperator.Vectorizable; @@ -12286,7 +12580,7 @@ public static Vector512 Invoke(Vector512 t) } /// T.Sinh(x) - internal readonly struct SinhOperator : IUnaryOperator + internal readonly struct SinhOperator : IUnaryOperator where T : IHyperbolicFunctions { // Same as cosh, but with `z -` rather than `z +`, and with the sign @@ -12339,7 +12633,7 @@ public static Vector512 Invoke(Vector512 t) } /// T.Tan(x) - internal readonly struct TanOperator : IUnaryOperator + internal readonly struct TanOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -12350,7 +12644,7 @@ public static Vector512 Invoke(Vector512 t) } /// T.TanPi(x) - internal readonly struct TanPiOperator : IUnaryOperator + internal readonly struct TanPiOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => TanOperator.Vectorizable; @@ -12361,7 +12655,7 @@ public static Vector512 Invoke(Vector512 t) } /// T.Tanh(x) - internal readonly struct TanhOperator : IUnaryOperator + internal readonly struct TanhOperator : IUnaryOperator where T : IHyperbolicFunctions { // This code is based on `vrs4_tanhf` from amd/aocl-libm-ose @@ -12424,7 +12718,7 @@ public static Vector512 Invoke(Vector512 t) } /// T.Log(x) - internal readonly struct LogOperator : IUnaryOperator + internal readonly struct LogOperator : IUnaryOperator where T : ILogarithmicFunctions { public static bool Vectorizable => (typeof(T) == typeof(double)) @@ -12510,7 +12804,7 @@ public static Vector512 Invoke(Vector512 x) #if !NET9_0_OR_GREATER /// double.Log(x) - internal readonly struct LogOperatorDouble : IUnaryOperator + internal readonly struct LogOperatorDouble : IUnaryOperator { // This code is based on `vrd2_log` from amd/aocl-libm-ose // Copyright (C) 2018-2020 Advanced Micro Devices, Inc. All rights reserved. @@ -12816,7 +13110,7 @@ public static Vector512 Invoke(Vector512 x) } /// float.Log(x) - internal readonly struct LogOperatorSingle : IUnaryOperator + internal readonly struct LogOperatorSingle : IUnaryOperator { // This code is based on `vrs4_logf` from amd/aocl-libm-ose // Copyright (C) 2018-2019 Advanced Micro Devices, Inc. All rights reserved. @@ -13103,7 +13397,7 @@ public static Vector512 Invoke(Vector512 x) #endif /// T.Log2(x) - internal readonly struct Log2Operator : IUnaryOperator + internal readonly struct Log2Operator : IUnaryOperator where T : ILogarithmicFunctions { public static bool Vectorizable => (typeof(T) == typeof(double)) @@ -13189,7 +13483,7 @@ public static Vector512 Invoke(Vector512 x) #if !NET9_0_OR_GREATER /// double.Log2(x) - internal readonly struct Log2OperatorDouble : IUnaryOperator + internal readonly struct Log2OperatorDouble : IUnaryOperator { // This code is based on `vrd2_log2` from amd/aocl-libm-ose // Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. @@ -13493,7 +13787,7 @@ public static Vector512 Invoke(Vector512 x) } /// float.Log2(x) - internal readonly struct Log2OperatorSingle : IUnaryOperator + internal readonly struct Log2OperatorSingle : IUnaryOperator { // This code is based on `vrs4_log2f` from amd/aocl-libm-ose // Copyright (C) 2021-2022 Advanced Micro Devices, Inc. All rights reserved. @@ -13775,7 +14069,7 @@ public static Vector512 Invoke(Vector512 x) #endif /// T.Log10(x) - internal readonly struct Log10Operator : IUnaryOperator + internal readonly struct Log10Operator : IUnaryOperator where T : ILogarithmicFunctions { public static bool Vectorizable => false; // TODO: Vectorize @@ -13786,7 +14080,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.LogP1(x) - internal readonly struct LogP1Operator : IUnaryOperator + internal readonly struct LogP1Operator : IUnaryOperator where T : ILogarithmicFunctions { public static bool Vectorizable => LogOperator.Vectorizable; @@ -13797,7 +14091,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Log2P1(x) - internal readonly struct Log2P1Operator : IUnaryOperator + internal readonly struct Log2P1Operator : IUnaryOperator where T : ILogarithmicFunctions { public static bool Vectorizable => Log2Operator.Vectorizable; @@ -13808,7 +14102,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.Log10P1(x) - internal readonly struct Log10P1Operator : IUnaryOperator + internal readonly struct Log10P1Operator : IUnaryOperator where T : ILogarithmicFunctions { public static bool Vectorizable => Log10Operator.Vectorizable; @@ -13879,7 +14173,7 @@ private static Vector512 ElementWiseSelect(Vector512 mask, Vector512 } /// 1 / (1 + T.Exp(-x)) - internal readonly struct SigmoidOperator : IUnaryOperator where T : IExponentialFunctions + internal readonly struct SigmoidOperator : IUnaryOperator where T : IExponentialFunctions { public static bool Vectorizable => typeof(T) == typeof(float); public static T Invoke(T x) => T.One / (T.One + T.Exp(-x)); @@ -13888,7 +14182,7 @@ private static Vector512 ElementWiseSelect(Vector512 mask, Vector512 public static Vector512 Invoke(Vector512 x) => Vector512.Create(T.One) / (Vector512.Create(T.One) + ExpOperator.Invoke(-x)); } - internal readonly struct CeilingOperator : IUnaryOperator where T : IFloatingPoint + internal readonly struct CeilingOperator : IUnaryOperator where T : IFloatingPoint { public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); @@ -13940,7 +14234,7 @@ public static Vector512 Invoke(Vector512 x) } } - internal readonly struct FloorOperator : IUnaryOperator where T : IFloatingPoint + internal readonly struct FloorOperator : IUnaryOperator where T : IFloatingPoint { public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); @@ -13992,7 +14286,7 @@ public static Vector512 Invoke(Vector512 x) } } - private readonly struct TruncateOperator : IUnaryOperator where T : IFloatingPoint + private readonly struct TruncateOperator : IUnaryOperator where T : IFloatingPoint { public static bool Vectorizable => typeof(T) == typeof(float) || typeof(T) == typeof(double); @@ -14071,7 +14365,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.PopCount(x) - internal readonly struct PopCountOperator : IUnaryOperator where T : IBinaryInteger + internal readonly struct PopCountOperator : IUnaryOperator where T : IBinaryInteger { public static bool Vectorizable => false; // TODO: Vectorize public static T Invoke(T x) => T.PopCount(x); @@ -14081,7 +14375,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.LeadingZeroCount(x) - internal readonly struct LeadingZeroCountOperator : IUnaryOperator where T : IBinaryInteger + internal readonly struct LeadingZeroCountOperator : IUnaryOperator where T : IBinaryInteger { public static bool Vectorizable => false; // TODO: Vectorize public static T Invoke(T x) => T.LeadingZeroCount(x); @@ -14091,7 +14385,7 @@ public static Vector512 Invoke(Vector512 x) } /// T.TrailingZeroCount(x) - internal readonly struct TrailingZeroCountOperator : IUnaryOperator where T : IBinaryInteger + internal readonly struct TrailingZeroCountOperator : IUnaryOperator where T : IBinaryInteger { public static bool Vectorizable => false; // TODO: Vectorize public static T Invoke(T x) => T.TrailingZeroCount(x); @@ -14192,7 +14486,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) } /// T.DegreesToRadians(x) - internal readonly struct DegreesToRadiansOperator : IUnaryOperator where T : ITrigonometricFunctions + internal readonly struct DegreesToRadiansOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => true; public static T Invoke(T x) => T.DegreesToRadians(x); @@ -14202,7 +14496,7 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) } /// T.RadiansToDegrees(x) - internal readonly struct RadiansToDegreesOperator : IUnaryOperator where T : ITrigonometricFunctions + internal readonly struct RadiansToDegreesOperator : IUnaryOperator where T : ITrigonometricFunctions { public static bool Vectorizable => true; public static T Invoke(T x) => T.RadiansToDegrees(x); @@ -14211,14 +14505,334 @@ public static Vector512 Invoke(Vector512 x, Vector512 y) public static Vector512 Invoke(Vector512 x) => (x * T.CreateChecked(180)) / T.Pi; } + /// T.ILogB(x) + internal readonly struct ILogBOperator : IUnaryOperator where T : IFloatingPointIeee754 + { + public static bool Vectorizable => false; // TODO: vectorize for float + + public static int Invoke(T x) => T.ILogB(x); + public static Vector128 Invoke(Vector128 x) => throw new NotImplementedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotImplementedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotImplementedException(); + } + + /// double.ILogB(x) + internal readonly struct ILogBDoubleOperator : IUnaryTwoToOneOperator + { + public static bool Vectorizable => false; // TODO: vectorize + + public static int Invoke(double x) => double.ILogB(x); + public static Vector128 Invoke(Vector128 lower, Vector128 upper) => throw new NotImplementedException(); + public static Vector256 Invoke(Vector256 lower, Vector256 upper) => throw new NotImplementedException(); + public static Vector512 Invoke(Vector512 lower, Vector512 upper) => throw new NotImplementedException(); + } + + /// T.CreateChecked(x) + internal readonly struct ConvertCheckedFallbackOperator : IUnaryOperator where TFrom : INumberBase where TTo : INumberBase + { + public static bool Vectorizable => false; + + public static TTo Invoke(TFrom x) => TTo.CreateChecked(x); + public static Vector128 Invoke(Vector128 x) => throw new NotImplementedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotImplementedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotImplementedException(); + } + + /// T.CreateSaturating(x) + internal readonly struct ConvertSaturatingFallbackOperator : IUnaryOperator where TFrom : INumberBase where TTo : INumberBase + { + public static bool Vectorizable => false; + + public static TTo Invoke(TFrom x) => TTo.CreateSaturating(x); + public static Vector128 Invoke(Vector128 x) => throw new NotImplementedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotImplementedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotImplementedException(); + } + + /// T.CreateTruncating(x) + internal readonly struct ConvertTruncatingFallbackOperator : IUnaryOperator where TFrom : INumberBase where TTo : INumberBase + { + public static bool Vectorizable => false; + + public static TTo Invoke(TFrom x) => TTo.CreateTruncating(x); + public static Vector128 Invoke(Vector128 x) => throw new NotImplementedException(); + public static Vector256 Invoke(Vector256 x) => throw new NotImplementedException(); + public static Vector512 Invoke(Vector512 x) => throw new NotImplementedException(); + } + + /// (uint)float + internal readonly struct ConvertUInt32ToSingle : IUnaryOperator + { + public static bool Vectorizable => true; + + public static float Invoke(uint x) => x; + public static Vector128 Invoke(Vector128 x) => Vector128.ConvertToSingle(x); + public static Vector256 Invoke(Vector256 x) => Vector256.ConvertToSingle(x); + public static Vector512 Invoke(Vector512 x) => Vector512.ConvertToSingle(x); + } + + /// (int)float + internal readonly struct ConvertInt32ToSingle : IUnaryOperator + { + public static bool Vectorizable => true; + + public static float Invoke(int x) => x; + public static Vector128 Invoke(Vector128 x) => Vector128.ConvertToSingle(x); + public static Vector256 Invoke(Vector256 x) => Vector256.ConvertToSingle(x); + public static Vector512 Invoke(Vector512 x) => Vector512.ConvertToSingle(x); + } + + /// (float)uint + internal readonly struct ConvertSingleToUInt32 : IUnaryOperator + { + public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar + + public static uint Invoke(float x) => uint.CreateTruncating(x); + public static Vector128 Invoke(Vector128 x) => Vector128.ConvertToUInt32(x); + public static Vector256 Invoke(Vector256 x) => Vector256.ConvertToUInt32(x); + public static Vector512 Invoke(Vector512 x) => Vector512.ConvertToUInt32(x); + } + + /// (float)int + internal readonly struct ConvertSingleToInt32 : IUnaryOperator + { + public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar + + public static int Invoke(float x) => int.CreateTruncating(x); + public static Vector128 Invoke(Vector128 x) => Vector128.ConvertToInt32(x); + public static Vector256 Invoke(Vector256 x) => Vector256.ConvertToInt32(x); + public static Vector512 Invoke(Vector512 x) => Vector512.ConvertToInt32(x); + } + + /// (double)ulong + internal readonly struct ConvertUInt64ToDouble : IUnaryOperator + { + public static bool Vectorizable => true; + + public static double Invoke(ulong x) => x; + public static Vector128 Invoke(Vector128 x) => Vector128.ConvertToDouble(x); + public static Vector256 Invoke(Vector256 x) => Vector256.ConvertToDouble(x); + public static Vector512 Invoke(Vector512 x) => Vector512.ConvertToDouble(x); + } + + /// (double)long + internal readonly struct ConvertInt64ToDouble : IUnaryOperator + { + public static bool Vectorizable => true; + + public static double Invoke(long x) => x; + public static Vector128 Invoke(Vector128 x) => Vector128.ConvertToDouble(x); + public static Vector256 Invoke(Vector256 x) => Vector256.ConvertToDouble(x); + public static Vector512 Invoke(Vector512 x) => Vector512.ConvertToDouble(x); + } + + /// (ulong)double + internal readonly struct ConvertDoubleToUInt64 : IUnaryOperator + { + public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar + + public static ulong Invoke(double x) => ulong.CreateTruncating(x); + public static Vector128 Invoke(Vector128 x) => Vector128.ConvertToUInt64(x); + public static Vector256 Invoke(Vector256 x) => Vector256.ConvertToUInt64(x); + public static Vector512 Invoke(Vector512 x) => Vector512.ConvertToUInt64(x); + } + + /// (long)double + internal readonly struct ConvertDoubleToInt64 : IUnaryOperator + { + public static bool Vectorizable => false; // TODO https://github.com/dotnet/runtime/pull/97529: make this true once vectorized behavior matches scalar + + public static long Invoke(double x) => long.CreateTruncating(x); + public static Vector128 Invoke(Vector128 x) => Vector128.ConvertToInt64(x); + public static Vector256 Invoke(Vector256 x) => Vector256.ConvertToInt64(x); + public static Vector512 Invoke(Vector512 x) => Vector512.ConvertToInt64(x); + } + + /// (double)float + internal readonly struct WidenSingleToDoubleOperator : IUnaryOneToTwoOperator + { + public static bool Vectorizable => true; + + public static double Invoke(float x) => x; + public static (Vector128 Lower, Vector128 Upper) Invoke(Vector128 x) => Vector128.Widen(x); + public static (Vector256 Lower, Vector256 Upper) Invoke(Vector256 x) => Vector256.Widen(x); + public static (Vector512 Lower, Vector512 Upper) Invoke(Vector512 x) => Vector512.Widen(x); + } + + /// (float)double + internal readonly struct NarrowDoubleToSingleOperator : IUnaryTwoToOneOperator + { + public static bool Vectorizable => true; + + public static float Invoke(double x) => (float)x; + public static Vector128 Invoke(Vector128 lower, Vector128 upper) => Vector128.Narrow(lower, upper); + public static Vector256 Invoke(Vector256 lower, Vector256 upper) => Vector256.Narrow(lower, upper); + public static Vector512 Invoke(Vector512 lower, Vector512 upper) => Vector512.Narrow(lower, upper); + } + + /// (ushort)byte + internal readonly struct WidenByteToUInt16Operator : IUnaryOneToTwoOperator + { + public static bool Vectorizable => true; + + public static ushort Invoke(byte x) => x; + public static (Vector128 Lower, Vector128 Upper) Invoke(Vector128 x) => Vector128.Widen(x); + public static (Vector256 Lower, Vector256 Upper) Invoke(Vector256 x) => Vector256.Widen(x); + public static (Vector512 Lower, Vector512 Upper) Invoke(Vector512 x) => Vector512.Widen(x); + } + + /// (byte)ushort + internal readonly struct NarrowUInt16ToByteOperator : IUnaryTwoToOneOperator + { + public static bool Vectorizable => true; + + public static byte Invoke(ushort x) => (byte)x; + public static Vector128 Invoke(Vector128 lower, Vector128 upper) => Vector128.Narrow(lower, upper); + public static Vector256 Invoke(Vector256 lower, Vector256 upper) => Vector256.Narrow(lower, upper); + public static Vector512 Invoke(Vector512 lower, Vector512 upper) => Vector512.Narrow(lower, upper); + } + + /// (short)sbyte + internal readonly struct WidenSByteToInt16Operator : IUnaryOneToTwoOperator + { + public static bool Vectorizable => true; + + public static short Invoke(sbyte x) => x; + public static (Vector128 Lower, Vector128 Upper) Invoke(Vector128 x) => Vector128.Widen(x); + public static (Vector256 Lower, Vector256 Upper) Invoke(Vector256 x) => Vector256.Widen(x); + public static (Vector512 Lower, Vector512 Upper) Invoke(Vector512 x) => Vector512.Widen(x); + } + + /// (sbyte)short + internal readonly struct NarrowInt16ToSByteOperator : IUnaryTwoToOneOperator + { + public static bool Vectorizable => true; + + public static sbyte Invoke(short x) => (sbyte)x; + public static Vector128 Invoke(Vector128 lower, Vector128 upper) => Vector128.Narrow(lower, upper); + public static Vector256 Invoke(Vector256 lower, Vector256 upper) => Vector256.Narrow(lower, upper); + public static Vector512 Invoke(Vector512 lower, Vector512 upper) => Vector512.Narrow(lower, upper); + } + + /// (uint)ushort + internal readonly struct WidenUInt16ToUInt32Operator : IUnaryOneToTwoOperator + { + public static bool Vectorizable => true; + + public static uint Invoke(ushort x) => x; + public static (Vector128 Lower, Vector128 Upper) Invoke(Vector128 x) => Vector128.Widen(x); + public static (Vector256 Lower, Vector256 Upper) Invoke(Vector256 x) => Vector256.Widen(x); + public static (Vector512 Lower, Vector512 Upper) Invoke(Vector512 x) => Vector512.Widen(x); + } + + /// (ushort)uint + internal readonly struct NarrowUInt32ToUInt16Operator : IUnaryTwoToOneOperator + { + public static bool Vectorizable => true; + + public static ushort Invoke(uint x) => (ushort)x; + public static Vector128 Invoke(Vector128 lower, Vector128 upper) => Vector128.Narrow(lower, upper); + public static Vector256 Invoke(Vector256 lower, Vector256 upper) => Vector256.Narrow(lower, upper); + public static Vector512 Invoke(Vector512 lower, Vector512 upper) => Vector512.Narrow(lower, upper); + } + + /// (int)short + internal readonly struct WidenInt16ToInt32Operator : IUnaryOneToTwoOperator + { + public static bool Vectorizable => true; + + public static int Invoke(short x) => x; + public static (Vector128 Lower, Vector128 Upper) Invoke(Vector128 x) => Vector128.Widen(x); + public static (Vector256 Lower, Vector256 Upper) Invoke(Vector256 x) => Vector256.Widen(x); + public static (Vector512 Lower, Vector512 Upper) Invoke(Vector512 x) => Vector512.Widen(x); + } + + /// (short)int + internal readonly struct NarrowInt32ToInt16Operator : IUnaryTwoToOneOperator + { + public static bool Vectorizable => true; + + public static short Invoke(int x) => (short)x; + public static Vector128 Invoke(Vector128 lower, Vector128 upper) => Vector128.Narrow(lower, upper); + public static Vector256 Invoke(Vector256 lower, Vector256 upper) => Vector256.Narrow(lower, upper); + public static Vector512 Invoke(Vector512 lower, Vector512 upper) => Vector512.Narrow(lower, upper); + } + + /// (ulong)uint + internal readonly struct WidenUInt32ToUInt64Operator : IUnaryOneToTwoOperator + { + public static bool Vectorizable => true; + + public static ulong Invoke(uint x) => x; + public static (Vector128 Lower, Vector128 Upper) Invoke(Vector128 x) => Vector128.Widen(x); + public static (Vector256 Lower, Vector256 Upper) Invoke(Vector256 x) => Vector256.Widen(x); + public static (Vector512 Lower, Vector512 Upper) Invoke(Vector512 x) => Vector512.Widen(x); + } + + /// (uint)ulong + internal readonly struct NarrowUInt64ToUInt32Operator : IUnaryTwoToOneOperator + { + public static bool Vectorizable => true; + + public static uint Invoke(ulong x) => (uint)x; + public static Vector128 Invoke(Vector128 lower, Vector128 upper) => Vector128.Narrow(lower, upper); + public static Vector256 Invoke(Vector256 lower, Vector256 upper) => Vector256.Narrow(lower, upper); + public static Vector512 Invoke(Vector512 lower, Vector512 upper) => Vector512.Narrow(lower, upper); + } + + /// (long)int + internal readonly struct WidenInt32ToInt64Operator : IUnaryOneToTwoOperator + { + public static bool Vectorizable => true; + + public static long Invoke(int x) => x; + public static (Vector128 Lower, Vector128 Upper) Invoke(Vector128 x) => Vector128.Widen(x); + public static (Vector256 Lower, Vector256 Upper) Invoke(Vector256 x) => Vector256.Widen(x); + public static (Vector512 Lower, Vector512 Upper) Invoke(Vector512 x) => Vector512.Widen(x); + } + + /// (int)long + internal readonly struct NarrowInt64ToInt32Operator : IUnaryTwoToOneOperator + { + public static bool Vectorizable => true; + + public static int Invoke(long x) => (int)x; + public static Vector128 Invoke(Vector128 lower, Vector128 upper) => Vector128.Narrow(lower, upper); + public static Vector256 Invoke(Vector256 lower, Vector256 upper) => Vector256.Narrow(lower, upper); + public static Vector512 Invoke(Vector512 lower, Vector512 upper) => Vector512.Narrow(lower, upper); + } + + /// Operator that takes one input value and returns a single value. + /// The input and output type must be of the same size if vectorization is desired. + private interface IUnaryOperator + { + static abstract bool Vectorizable { get; } + static abstract TOutput Invoke(TInput x); + static abstract Vector128 Invoke(Vector128 x); + static abstract Vector256 Invoke(Vector256 x); + static abstract Vector512 Invoke(Vector512 x); + } + + /// Operator that takes one input value and returns a single value. + /// The input type must be half the size of the output type. + private interface IUnaryOneToTwoOperator + { + static abstract bool Vectorizable { get; } + static abstract TOutput Invoke(TInput x); + static abstract (Vector128 Lower, Vector128 Upper) Invoke(Vector128 x); + static abstract (Vector256 Lower, Vector256 Upper) Invoke(Vector256 x); + static abstract (Vector512 Lower, Vector512 Upper) Invoke(Vector512 x); + } + /// Operator that takes one input value and returns a single value. - private interface IUnaryOperator + /// The input type must be twice the size of the output type. + private interface IUnaryTwoToOneOperator { static abstract bool Vectorizable { get; } - static abstract T Invoke(T x); - static abstract Vector128 Invoke(Vector128 x); - static abstract Vector256 Invoke(Vector256 x); - static abstract Vector512 Invoke(Vector512 x); + static abstract TOutput Invoke(TInput x); + static abstract Vector128 Invoke(Vector128 lower, Vector128 upper); + static abstract Vector256 Invoke(Vector256 lower, Vector256 upper); + static abstract Vector512 Invoke(Vector512 lower, Vector512 upper); } /// Operator that takes two input values and returns a single value. diff --git a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs index b44036b1a74628..4d1c22a402e54e 100644 --- a/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs +++ b/src/libraries/System.Numerics.Tensors/tests/TensorPrimitives.Generic.cs @@ -4,6 +4,7 @@ using System.Buffers; using System.Collections.Generic; using System.Linq; +using System.Reflection; using System.Runtime.InteropServices; using Xunit; using Xunit.Sdk; @@ -15,6 +16,198 @@ namespace System.Numerics.Tensors.Tests { + public class ConvertTests + { + [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNotBuiltWithAggressiveTrimming))] + public void ConvertTruncatingAndSaturating() + { + MethodInfo convertTruncatingImpl = typeof(ConvertTests).GetMethod(nameof(ConvertTruncatingImpl), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance); + Assert.NotNull(convertTruncatingImpl); + + MethodInfo convertSaturatingImpl = typeof(ConvertTests).GetMethod(nameof(ConvertSaturatingImpl), BindingFlags.Public | BindingFlags.NonPublic | BindingFlags.Static | BindingFlags.Instance); + Assert.NotNull(convertSaturatingImpl); + + Type[] types = + [ + typeof(sbyte), typeof(byte), + typeof(short), typeof(ushort), typeof(char), + typeof(int), typeof(uint), + typeof(long), typeof(ulong), + typeof(nint), typeof(nuint), + typeof(Half), typeof(float), typeof(double), typeof(NFloat), + typeof(Int128), typeof(UInt128), + ]; + + foreach (Type from in types) + { + foreach (Type to in types) + { + convertTruncatingImpl.MakeGenericMethod(from, to).Invoke(null, null); + convertSaturatingImpl.MakeGenericMethod(from, to).Invoke(null, null); + } + } + } + + [Fact] + public void ConvertChecked() + { + // Conversions that never overflow. This isn't an exhaustive list; just a sampling. + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + ConvertCheckedImpl(); + + // Conversions that may overflow. This isn't an exhaustive list; just a sampling. + ConvertCheckedImpl(42f, float.MaxValue); + ConvertCheckedImpl(42, int.MaxValue + 1L); + } + + private static void ConvertTruncatingImpl() + where TFrom : unmanaged, INumber + where TTo : unmanaged, INumber + { + AssertExtensions.Throws("destination", () => TensorPrimitives.ConvertTruncating(new TFrom[3], new TTo[2])); + + foreach (int tensorLength in Helpers.TensorLengthsIncluding0) + { + using BoundedMemory source = BoundedMemory.Allocate(tensorLength); + using BoundedMemory destination = BoundedMemory.Allocate(tensorLength); + + Random rand = new(42); + Span sourceSpan = source.Span; + for (int i = 0; i < tensorLength; i++) + { + sourceSpan[i] = TFrom.CreateTruncating(new Int128( + (ulong)rand.NextInt64(long.MinValue, long.MaxValue), + (ulong)rand.NextInt64(long.MinValue, long.MaxValue))); + } + + TensorPrimitives.ConvertTruncating(source.Span, destination.Span); + + for (int i = 0; i < tensorLength; i++) + { + if (!IsEqualWithTolerance(TTo.CreateTruncating(source.Span[i]), destination.Span[i])) + { + throw new XunitException($"{typeof(TFrom).Name} => {typeof(TTo).Name}. Input: {source.Span[i]}. Actual: {destination.Span[i]}. Expected: {TTo.CreateTruncating(source.Span[i])}."); + } + } + }; + } + + private static void ConvertSaturatingImpl() + where TFrom : unmanaged, INumber + where TTo : unmanaged, INumber + { + AssertExtensions.Throws("destination", () => TensorPrimitives.ConvertSaturating(new TFrom[3], new TTo[2])); + + foreach (int tensorLength in Helpers.TensorLengthsIncluding0) + { + using BoundedMemory source = BoundedMemory.Allocate(tensorLength); + using BoundedMemory destination = BoundedMemory.Allocate(tensorLength); + + Random rand = new(42); + Span sourceSpan = source.Span; + for (int i = 0; i < tensorLength; i++) + { + sourceSpan[i] = TFrom.CreateTruncating(new Int128( + (ulong)rand.NextInt64(long.MinValue, long.MaxValue), + (ulong)rand.NextInt64(long.MinValue, long.MaxValue))); + } + + TensorPrimitives.ConvertSaturating(source.Span, destination.Span); + + for (int i = 0; i < tensorLength; i++) + { + if (!IsEqualWithTolerance(TTo.CreateSaturating(source.Span[i]), destination.Span[i])) + { + throw new XunitException($"{typeof(TFrom).Name} => {typeof(TTo).Name}. Input: {source.Span[i]}. Actual: {destination.Span[i]}. Expected: {TTo.CreateSaturating(source.Span[i])}."); + } + } + }; + } + + private static void ConvertCheckedImpl() + where TFrom : unmanaged, INumber + where TTo : unmanaged, INumber + { + AssertExtensions.Throws("destination", () => TensorPrimitives.ConvertChecked(new TFrom[3], new TTo[2])); + + foreach (int tensorLength in Helpers.TensorLengthsIncluding0) + { + using BoundedMemory source = BoundedMemory.Allocate(tensorLength); + using BoundedMemory destination = BoundedMemory.Allocate(tensorLength); + + Random rand = new(42); + Span sourceSpan = source.Span; + for (int i = 0; i < tensorLength; i++) + { + sourceSpan[i] = TFrom.CreateTruncating(new Int128( + (ulong)rand.NextInt64(long.MinValue, long.MaxValue), + (ulong)rand.NextInt64(long.MinValue, long.MaxValue))); + } + + TensorPrimitives.ConvertChecked(source.Span, destination.Span); + + for (int i = 0; i < tensorLength; i++) + { + if (!IsEqualWithTolerance(TTo.CreateChecked(source.Span[i]), destination.Span[i])) + { + throw new XunitException($"{typeof(TFrom).Name} => {typeof(TTo).Name}. Input: {source.Span[i]}. Actual: {destination.Span[i]}. Expected: {TTo.CreateChecked(source.Span[i])}."); + } + } + }; + } + + private static void ConvertCheckedImpl(TFrom valid, TFrom invalid) + where TFrom : unmanaged, INumber + where TTo : unmanaged, INumber + { + foreach (int tensorLength in Helpers.TensorLengths) + { + using BoundedMemory source = BoundedMemory.Allocate(tensorLength); + using BoundedMemory destination = BoundedMemory.Allocate(tensorLength); + + // Test with valid + source.Span.Fill(valid); + TensorPrimitives.ConvertChecked(source.Span, destination.Span); + foreach (TTo result in destination.Span) + { + Assert.True(IsEqualWithTolerance(TTo.CreateChecked(valid), result)); + } + + // Test with at least one invalid + foreach (int invalidPosition in new[] { 0, tensorLength / 2, tensorLength - 1 }) + { + source.Span.Fill(valid); + source.Span[invalidPosition] = invalid; + Assert.Throws(() => TensorPrimitives.ConvertChecked(source.Span, destination.Span)); + } + }; + } + + private static bool IsEqualWithTolerance(T expected, T actual, T? tolerance = null) where T : unmanaged, INumber + { + tolerance ??= T.CreateTruncating(0.0001); + + T diff = T.Abs(expected - actual); + if (diff > tolerance && diff > T.Max(T.Abs(expected), T.Abs(actual)) * tolerance) + { + return false; + } + + return true; + } + } + public class DoubleGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests { } public class SingleGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests { } public class HalfGenericTensorPrimitives : GenericFloatingPointNumberTensorPrimitivesTests From f7972c541285a4c4d1b93695bf37c041b6b2f4ce Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 26 Jan 2024 21:40:29 -0500 Subject: [PATCH 2/4] Fix auto-indentation --- .../netcore/TensorPrimitives.netcore.cs | 4094 ++++++++--------- 1 file changed, 2047 insertions(+), 2047 deletions(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 7cada40c624203..16555a4e98f29e 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -999,61 +999,61 @@ static T Vectorized128(ref T xRef, nuint remainder) switch (blocks) { case 7: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector128 vector = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -1168,61 +1168,61 @@ static T Vectorized256(ref T xRef, nuint remainder) switch (blocks) { case 7: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector256 vector = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -1337,61 +1337,61 @@ static T Vectorized512(ref T xRef, nuint remainder) switch (blocks) { case 7: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector512 vector = TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector512.ConditionalSelect(CreateRemainderMaskVector512((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector512.ConditionalSelect(CreateRemainderMaskVector512((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -1426,87 +1426,87 @@ static T VectorizedSmall4(ref T xRef, nuint remainder) case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - Vector256 end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 3: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2))); - goto case 2; - } + { + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2))); + goto case 2; + } case 2: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1))); - goto case 1; - } + { + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1))); + goto case 1; + } case 1: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); - goto case 0; - } + { + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); + goto case 0; + } case 0: - { - break; - } + { + break; + } } return result; @@ -1523,73 +1523,73 @@ static T VectorizedSmall8(ref T xRef, nuint remainder) case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - Vector256 end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector256 beg = TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector128 beg = TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 1: - { - result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); - goto case 0; - } + { + result = TAggregationOperator.Invoke(result, TTransformOperator.Invoke(xRef)); + goto case 0; + } case 0: - { - break; - } + { + break; + } } return result; @@ -1826,68 +1826,68 @@ static T Vectorized128(ref T xRef, ref T yRef, nuint remainder) switch (blocks) { case 7: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 1)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 1)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)trailing), end, Vector128.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -2017,68 +2017,68 @@ static T Vectorized256(ref T xRef, ref T yRef, nuint remainder) switch (blocks) { case 7: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 1)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 1)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)trailing), end, Vector256.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -2208,68 +2208,68 @@ static T Vectorized512(ref T xRef, ref T yRef, nuint remainder) switch (blocks) { case 7: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 6; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 6; + } case 6: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 5; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 5; + } case 5: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 4; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 4; + } case 4: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 3; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 3; + } case 3: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 2; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 2; + } case 2: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 1; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 1; + } case 1: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 1)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 1))); - vresult = TAggregationOperator.Invoke(vresult, vector); - goto case 0; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 1)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 1))); + vresult = TAggregationOperator.Invoke(vresult, vector); + goto case 0; + } case 0: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end = Vector512.ConditionalSelect(CreateRemainderMaskVector512((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, end); - break; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end = Vector512.ConditionalSelect(CreateRemainderMaskVector512((int)trailing), end, Vector512.Create(TAggregationOperator.IdentityValue)); + vresult = TAggregationOperator.Invoke(vresult, end); + break; + } } return TAggregationOperator.Invoke(vresult); @@ -2304,95 +2304,95 @@ static T VectorizedSmall4(ref T xRef, ref T yRef, nuint remainder) case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 3: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2))); - goto case 2; - } + { + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + Unsafe.Add(ref yRef, 2))); + goto case 2; + } case 2: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1))); - goto case 1; - } + { + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + Unsafe.Add(ref yRef, 1))); + goto case 1; + } case 1: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); - goto case 0; - } + { + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); + goto case 0; + } case 0: - { - break; - } + { + break; + } } return result; @@ -2409,79 +2409,79 @@ static T VectorizedSmall8(ref T xRef, ref T yRef, nuint remainder) case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); - end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); + end = Vector256.ConditionalSelect(CreateRemainderMaskVector256((int)(remainder % (uint)Vector256.Count)), end, Vector256.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector256.IsHardwareAccelerated); + Vector256 vresult = Vector256.Create(TAggregationOperator.IdentityValue); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); - end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); + end = Vector128.ConditionalSelect(CreateRemainderMaskVector128((int)(remainder % (uint)Vector128.Count)), end, Vector128.Create(TAggregationOperator.IdentityValue)); - vresult = TAggregationOperator.Invoke(vresult, beg); - vresult = TAggregationOperator.Invoke(vresult, end); + vresult = TAggregationOperator.Invoke(vresult, beg); + vresult = TAggregationOperator.Invoke(vresult, end); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); + { + Debug.Assert(Vector128.IsHardwareAccelerated); + Vector128 vresult = Vector128.Create(TAggregationOperator.IdentityValue); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - vresult = TAggregationOperator.Invoke(vresult, beg); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + vresult = TAggregationOperator.Invoke(vresult, beg); - result = TAggregationOperator.Invoke(vresult); - break; - } + result = TAggregationOperator.Invoke(vresult); + break; + } case 1: - { - result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); - goto case 0; - } + { + result = TAggregationOperator.Invoke(result, TBinaryOperator.Invoke(xRef, yRef)); + goto case 0; + } case 0: - { - break; - } + { + break; + } } return result; @@ -3275,67 +3275,67 @@ static void Vectorized128(ref TInput xRef, ref TOutput dRef, nuint remainder) switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -3486,67 +3486,67 @@ static void Vectorized256(ref TInput xRef, ref TOutput dRef, nuint remainder) switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -3697,67 +3697,67 @@ static void Vectorized512(ref TInput xRef, ref TOutput dRef, nuint remainder) switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TUnaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -3789,75 +3789,75 @@ static void VectorizedSmall4(ref TInput xRef, ref TOutput dRef, nuint remainder) case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2)); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 2)); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1)); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TUnaryOperator.Invoke(Unsafe.Add(ref xRef, 1)); + goto case 1; + } case 1: - { - dRef = TUnaryOperator.Invoke(xRef); - goto case 0; - } + { + dRef = TUnaryOperator.Invoke(xRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -3871,61 +3871,61 @@ static void VectorizedSmall8(ref TInput xRef, ref TOutput dRef, nuint remainder) case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + Vector256 end = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TUnaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + Vector128 end = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TUnaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TUnaryOperator.Invoke(xRef); - goto case 0; - } + { + dRef = TUnaryOperator.Invoke(xRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } @@ -4458,74 +4458,74 @@ static void Vectorized128(ref T xRef, ref T yRef, ref T dRef, nuint remainder) switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -4700,74 +4700,74 @@ static void Vectorized256(ref T xRef, ref T yRef, ref T dRef, nuint remainder) switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -4942,74 +4942,74 @@ static void Vectorized512(ref T xRef, ref T yRef, ref T dRef, nuint remainder) switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TBinaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -5041,83 +5041,83 @@ static void VectorizedSmall4(ref T xRef, ref T yRef, ref T dRef, nuint remainder case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2)); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + Unsafe.Add(ref yRef, 2)); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1)); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + Unsafe.Add(ref yRef, 1)); + goto case 1; + } case 1: - { - dRef = TBinaryOperator.Invoke(xRef, yRef); - goto case 0; - } + { + dRef = TBinaryOperator.Invoke(xRef, yRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -5131,67 +5131,67 @@ static void VectorizedSmall8(ref T xRef, ref T yRef, ref T dRef, nuint remainder case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + Vector256 end = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TBinaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + Vector128 end = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TBinaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TBinaryOperator.Invoke(xRef, yRef); - goto case 0; - } + { + dRef = TBinaryOperator.Invoke(xRef, yRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } @@ -5492,74 +5492,74 @@ static void Vectorized128(ref T xRef, T y, ref T dRef, nuint remainder) switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -5730,74 +5730,74 @@ static void Vectorized256(ref T xRef, T y, ref T dRef, nuint remainder) switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -5968,74 +5968,74 @@ static void Vectorized512(ref T xRef, T y, ref T dRef, nuint remainder) switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))), - yVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2))), + yVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -6067,87 +6067,87 @@ static void VectorizedSmall4(ref T xRef, T y, ref T dRef, nuint remainder) case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 yVec = Vector256.Create(y); + Vector256 yVec = Vector256.Create(y); - Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), - yVec); - Vector256 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)), - yVec); + Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), + yVec); + Vector256 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)), + yVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), - Vector256.Create(y)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), + Vector256.Create(y)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 yVec = Vector128.Create(y); + Vector128 yVec = Vector128.Create(y); - Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), - yVec); - Vector128 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)), - yVec); + Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), + yVec); + Vector128 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)), + yVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), - Vector128.Create(y)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), + Vector128.Create(y)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)), - y); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 2)), + y); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)), - y); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TBinaryOperator.Invoke(TTransformOperator.Invoke(Unsafe.Add(ref xRef, 1)), + y); + goto case 1; + } case 1: - { - dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); - goto case 0; - } + { + dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -6161,71 +6161,71 @@ static void VectorizedSmall8(ref T xRef, T y, ref T dRef, nuint remainder) case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 yVec = Vector256.Create(y); + Vector256 yVec = Vector256.Create(y); - Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), - yVec); - Vector256 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)), - yVec); + Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), + yVec); + Vector256 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count)), + yVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), - Vector256.Create(y)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector256.LoadUnsafe(ref xRef)), + Vector256.Create(y)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 yVec = Vector128.Create(y); + Vector128 yVec = Vector128.Create(y); - Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), - yVec); - Vector128 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)), - yVec); + Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), + yVec); + Vector128 end = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count)), + yVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), - Vector128.Create(y)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TBinaryOperator.Invoke(TTransformOperator.Invoke(Vector128.LoadUnsafe(ref xRef)), + Vector128.Create(y)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); - goto case 0; - } + { + dRef = TBinaryOperator.Invoke(TTransformOperator.Invoke(xRef), y); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } @@ -6534,81 +6534,81 @@ static void Vectorized128(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2)), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2)), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -6807,81 +6807,81 @@ static void Vectorized256(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2)), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2)), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -7080,81 +7080,81 @@ static void Vectorized512(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nuint switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -7186,91 +7186,91 @@ static void VectorizedSmall4(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nui case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.LoadUnsafe(ref zRef)); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.LoadUnsafe(ref zRef)); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.LoadUnsafe(ref zRef)); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.LoadUnsafe(ref zRef)); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2), - Unsafe.Add(ref zRef, 2)); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + Unsafe.Add(ref yRef, 2), + Unsafe.Add(ref zRef, 2)); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1), - Unsafe.Add(ref zRef, 1)); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + Unsafe.Add(ref yRef, 1), + Unsafe.Add(ref zRef, 1)); + goto case 1; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -7284,73 +7284,73 @@ static void VectorizedSmall8(ref T xRef, ref T yRef, ref T zRef, ref T dRef, nui case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.LoadUnsafe(ref zRef)); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.LoadUnsafe(ref zRef)); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.LoadUnsafe(ref zRef)); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.LoadUnsafe(ref zRef)); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, yRef, zRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } @@ -7653,81 +7653,81 @@ static void Vectorized128(ref T xRef, ref T yRef, T z, ref T dRef, nuint remaind switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 8)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 7)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 6)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 5)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 4)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 3)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)(Vector128.Count * 2)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -7922,81 +7922,81 @@ static void Vectorized256(ref T xRef, ref T yRef, T z, ref T dRef, nuint remaind switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 8)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 7)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 6)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 5)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 4)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 3)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)(Vector256.Count * 2)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -8191,81 +8191,81 @@ static void Vectorized512(ref T xRef, ref T yRef, T z, ref T dRef, nuint remaind switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 8)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 7)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 6)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 5)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 4)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 3)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2)), - zVec); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + Vector512.LoadUnsafe(ref yRef, remainder - (uint)(Vector512.Count * 2)), + zVec); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -8297,95 +8297,95 @@ static void VectorizedSmall4(ref T xRef, ref T yRef, T z, ref T dRef, nuint rema case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 zVec = Vector256.Create(z); + Vector256 zVec = Vector256.Create(z); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - zVec); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), - zVec); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + zVec); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), + zVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.Create(z)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.Create(z)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 zVec = Vector128.Create(z); + Vector128 zVec = Vector128.Create(z); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - zVec); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), - zVec); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + zVec); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), + zVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.Create(z)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.Create(z)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - Unsafe.Add(ref yRef, 2), - z); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + Unsafe.Add(ref yRef, 2), + z); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - Unsafe.Add(ref yRef, 1), - z); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + Unsafe.Add(ref yRef, 1), + z); + goto case 1; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, z); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, yRef, z); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -8399,77 +8399,77 @@ static void VectorizedSmall8(ref T xRef, ref T yRef, T z, ref T dRef, nuint rema case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 zVec = Vector256.Create(z); + Vector256 zVec = Vector256.Create(z); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - zVec); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), - zVec); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + zVec); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + Vector256.LoadUnsafe(ref yRef, remainder - (uint)Vector256.Count), + zVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.LoadUnsafe(ref yRef), - Vector256.Create(z)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.LoadUnsafe(ref yRef), + Vector256.Create(z)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 zVec = Vector128.Create(z); + Vector128 zVec = Vector128.Create(z); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - zVec); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), - zVec); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + zVec); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + Vector128.LoadUnsafe(ref yRef, remainder - (uint)Vector128.Count), + zVec); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.LoadUnsafe(ref yRef), - Vector128.Create(z)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.LoadUnsafe(ref yRef), + Vector128.Create(z)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, yRef, z); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, yRef, z); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } @@ -8772,81 +8772,81 @@ static void Vectorized128(ref T xRef, T y, ref T zRef, ref T dRef, nuint remaind switch (remainder / (uint)Vector128.Count) { case 8: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); - goto case 7; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 8)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 8)); + goto case 7; + } case 7: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); - goto case 6; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 7)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 7)); + goto case 6; + } case 6: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); - goto case 5; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 6)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 6)); + goto case 5; + } case 5: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); - goto case 4; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 5)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 5)); + goto case 4; + } case 4: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); - goto case 3; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 4)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 4)); + goto case 3; + } case 3: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); - goto case 2; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 3)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 3)); + goto case 2; + } case 2: - { - Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); - goto case 1; - } + { + Vector128 vector = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)(Vector128.Count * 2)), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)(Vector128.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector128.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector128.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -9041,81 +9041,81 @@ static void Vectorized256(ref T xRef, T y, ref T zRef, ref T dRef, nuint remaind switch (remainder / (uint)Vector256.Count) { case 8: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); - goto case 7; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 8)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 8)); + goto case 7; + } case 7: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); - goto case 6; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 7)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 7)); + goto case 6; + } case 6: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); - goto case 5; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 6)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 6)); + goto case 5; + } case 5: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); - goto case 4; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 5)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 5)); + goto case 4; + } case 4: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); - goto case 3; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 4)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 4)); + goto case 3; + } case 3: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); - goto case 2; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 3)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 3)); + goto case 2; + } case 2: - { - Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); - goto case 1; - } + { + Vector256 vector = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)(Vector256.Count * 2)), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)(Vector256.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector256.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector256.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -9310,81 +9310,81 @@ static void Vectorized512(ref T xRef, T y, ref T zRef, ref T dRef, nuint remaind switch (remainder / (uint)Vector512.Count) { case 8: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 8))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); - goto case 7; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 8)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 8))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 8)); + goto case 7; + } case 7: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 7))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); - goto case 6; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 7)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 7))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 7)); + goto case 6; + } case 6: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 6))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); - goto case 5; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 6)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 6))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 6)); + goto case 5; + } case 5: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 5))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); - goto case 4; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 5)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 5))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 5)); + goto case 4; + } case 4: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 4))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); - goto case 3; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 4)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 4))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 4)); + goto case 3; + } case 3: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 3))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); - goto case 2; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 3)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 3))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 3)); + goto case 2; + } case 2: - { - Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), - yVec, - Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 2))); - vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); - goto case 1; - } + { + Vector512 vector = TTernaryOperator.Invoke(Vector512.LoadUnsafe(ref xRef, remainder - (uint)(Vector512.Count * 2)), + yVec, + Vector512.LoadUnsafe(ref zRef, remainder - (uint)(Vector512.Count * 2))); + vector.StoreUnsafe(ref dRef, remainder - (uint)(Vector512.Count * 2)); + goto case 1; + } case 1: - { - // Store the last block, which includes any elements that wouldn't fill a full vector - end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); - goto case 0; - } + { + // Store the last block, which includes any elements that wouldn't fill a full vector + end.StoreUnsafe(ref dRef, endIndex - (uint)Vector512.Count); + goto case 0; + } case 0: - { - // Store the first block, which includes any elements preceding the first aligned block - beg.StoreUnsafe(ref dRefBeg); - break; - } + { + // Store the first block, which includes any elements preceding the first aligned block + beg.StoreUnsafe(ref dRefBeg); + break; + } } } @@ -9416,95 +9416,95 @@ static void VectorizedSmall4(ref T xRef, T y, ref T zRef, ref T dRef, nuint rema case 11: case 10: case 9: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 yVec = Vector256.Create(y); + Vector256 yVec = Vector256.Create(y); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - yVec, - Vector256.LoadUnsafe(ref zRef)); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + yVec, + Vector256.LoadUnsafe(ref zRef)); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 8: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.Create(y), - Vector256.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.Create(y), + Vector256.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 7: case 6: case 5: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 yVec = Vector128.Create(y); + Vector128 yVec = Vector128.Create(y); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - yVec, - Vector128.LoadUnsafe(ref zRef)); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + yVec, + Vector128.LoadUnsafe(ref zRef)); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.Create(y), - Vector128.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.Create(y), + Vector128.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), - y, - Unsafe.Add(ref zRef, 2)); - goto case 2; - } + { + Unsafe.Add(ref dRef, 2) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 2), + y, + Unsafe.Add(ref zRef, 2)); + goto case 2; + } case 2: - { - Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), - y, - Unsafe.Add(ref zRef, 1)); - goto case 1; - } + { + Unsafe.Add(ref dRef, 1) = TTernaryOperator.Invoke(Unsafe.Add(ref xRef, 1), + y, + Unsafe.Add(ref zRef, 1)); + goto case 1; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, y, zRef); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, y, zRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } @@ -9518,77 +9518,77 @@ static void VectorizedSmall8(ref T xRef, T y, ref T zRef, ref T dRef, nuint rema case 7: case 6: case 5: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 yVec = Vector256.Create(y); + Vector256 yVec = Vector256.Create(y); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - yVec, - Vector256.LoadUnsafe(ref zRef)); - Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), - yVec, - Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + yVec, + Vector256.LoadUnsafe(ref zRef)); + Vector256 end = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef, remainder - (uint)Vector256.Count), + yVec, + Vector256.LoadUnsafe(ref zRef, remainder - (uint)Vector256.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector256.Count); - break; - } + break; + } case 4: - { - Debug.Assert(Vector256.IsHardwareAccelerated); + { + Debug.Assert(Vector256.IsHardwareAccelerated); - Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), - Vector256.Create(y), - Vector256.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector256 beg = TTernaryOperator.Invoke(Vector256.LoadUnsafe(ref xRef), + Vector256.Create(y), + Vector256.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 3: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 yVec = Vector128.Create(y); + Vector128 yVec = Vector128.Create(y); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - yVec, - Vector128.LoadUnsafe(ref zRef)); - Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), - yVec, - Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + yVec, + Vector128.LoadUnsafe(ref zRef)); + Vector128 end = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef, remainder - (uint)Vector128.Count), + yVec, + Vector128.LoadUnsafe(ref zRef, remainder - (uint)Vector128.Count)); - beg.StoreUnsafe(ref dRef); - end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); + beg.StoreUnsafe(ref dRef); + end.StoreUnsafe(ref dRef, remainder - (uint)Vector128.Count); - break; - } + break; + } case 2: - { - Debug.Assert(Vector128.IsHardwareAccelerated); + { + Debug.Assert(Vector128.IsHardwareAccelerated); - Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), - Vector128.Create(y), - Vector128.LoadUnsafe(ref zRef)); - beg.StoreUnsafe(ref dRef); + Vector128 beg = TTernaryOperator.Invoke(Vector128.LoadUnsafe(ref xRef), + Vector128.Create(y), + Vector128.LoadUnsafe(ref zRef)); + beg.StoreUnsafe(ref dRef); - break; - } + break; + } case 1: - { - dRef = TTernaryOperator.Invoke(xRef, y, zRef); - goto case 0; - } + { + dRef = TTernaryOperator.Invoke(xRef, y, zRef); + goto case 0; + } case 0: - { - break; - } + { + break; + } } } } From 31458d357ff5f477e0e5a44b4a5ea6fb6fe9a73b Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Fri, 26 Jan 2024 21:42:56 -0500 Subject: [PATCH 3/4] Add comment --- .../System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index 16555a4e98f29e..f275e56e317f43 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -3047,6 +3047,7 @@ private static void InvokeSpanIntoSpan( if (typeof(TInput) == typeof(TOutput)) { + // This ignores the unsafe case where a developer passes in overlapping spans for distinct types. ValidateInputOutputSpanNonOverlapping(x, Rename(destination)); } From 25be378143c5d6bc83204acb834095a2d58142e8 Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Sat, 27 Jan 2024 14:53:11 -0500 Subject: [PATCH 4/4] Fix failures --- .../System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs index f275e56e317f43..fec346b381913f 100644 --- a/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs +++ b/src/libraries/System.Numerics.Tensors/src/System/Numerics/Tensors/netcore/TensorPrimitives.netcore.cs @@ -4022,7 +4022,7 @@ private static void InvokeSpanIntoSpan_2to1( Debug.Assert(Vector128.IsSupported); Debug.Assert(Vector128.IsSupported); - twoVectorsFromEnd = x.Length - (Vector256.Count * 2); + twoVectorsFromEnd = x.Length - (Vector128.Count * 2); if (i <= twoVectorsFromEnd) { // Loop handling two input vectors / one output vector at a time.