-
Couldn't load subscription status.
- Fork 5.2k
Open
Labels
api-approvedAPI was approved in API review, it can be implementedAPI was approved in API review, it can be implementedarea-System.Runtime.Intrinsicsavx512Related to the AVX-512 architectureRelated to the AVX-512 architecture
Milestone
Description
Background and motivation
Both VPOPCNTDQ and BITALG are supported by Intel in the Ice Lake and newer architectures, and by AMD in Zen 4.
VPOPCNTDQ allows for parallel popcnt in either Vector128, Vector256, or Vector512 for ulong and uint.
BITALG expands parallel popcnt for ushort and byte, and it also adds VPSHUFBITQMB instruction, which performs a bit gather select.
VPOPCNTW is highly beneficial for my current project, allowing me for counting filled blocks row-by-row for a bit-board of block games.
API Proposal
namespace System.Runtime.Intrinsics.X86;
[Intrinsic]
[CLSCompliant(false)]
public abstract class Avx512VPopcntDQ : Avx512DQ
{
public static new bool IsSupported { get; }
[Intrinsic]
public new abstract class X64 : Avx512DQ.X64
{
internal X64() { }
public static new bool IsSupported { get; }
}
public static Vector512<int> PopCount(Vector512<int> value);
public static Vector512<uint> PopCount(Vector512<uint> value);
public static Vector512<long> PopCount(Vector512<long> value);
public static Vector512<ulong> PopCount(Vector512<ulong> value);
public abstract class VL : Avx512DQ.VL
{
public static new bool IsSupported { get; }
public static Vector256<int> PopCount(Vector256<int> value);
public static Vector256<uint> PopCount(Vector256<uint> value);
public static Vector256<long> PopCount(Vector256<long> value);
public static Vector256<ulong> PopCount(Vector256<ulong> value);
public static Vector128<int> PopCount(Vector128<int> value);
public static Vector128<uint> PopCount(Vector128<uint> value);
public static Vector128<long> PopCount(Vector128<long> value);
public static Vector128<ulong> PopCount(Vector128<ulong> value);
}
}
[Intrinsic]
[CLSCompliant(false)]
public abstract class Avx512BitAlg : Avx512BW
{
public static new bool IsSupported { get; }
[Intrinsic]
public new abstract class X64 : Avx512BW.X64
{
internal X64() { }
public static new bool IsSupported { get; }
}
public static Vector512<short> PopCount(Vector512<short> value);
public static Vector512<ushort> PopCount(Vector512<ushort> value);
public static Vector512<byte> PopCount(Vector512<byte> value);
public static Vector512<sbyte> PopCount(Vector512<sbyte> value);
public static Vector512<byte> ShuffleBits(Vector512<ulong> value, Vector512<byte> control);
public static Vector512<sbyte> ShuffleBits(Vector512<long> value, Vector512<sbyte> control);
public static Vector512<byte> MaskShuffleBits(Vector512<byte> mask, Vector512<ulong> value, Vector512<byte> control);
public static Vector512<sbyte> MaskShuffleBits(Vector512<sbyte> mask, Vector512<long> value, Vector512<sbyte> control);
public abstract class VL : Avx512BW.VL
{
public static new bool IsSupported { get; }
public static Vector256<short> PopCount(Vector256<short> value);
public static Vector256<ushort> PopCount(Vector256<ushort> value);
public static Vector256<byte> PopCount(Vector256<byte> value);
public static Vector256<sbyte> PopCount(Vector256<sbyte> value);
public static Vector128<short> PopCount(Vector128<short> value);
public static Vector128<ushort> PopCount(Vector128<ushort> value);
public static Vector128<byte> PopCount(Vector128<byte> value);
public static Vector128<sbyte> PopCount(Vector128<sbyte> value);
public static Vector256<byte> ShuffleBits(Vector256<ulong> value, Vector256<byte> control);
public static Vector256<sbyte> ShuffleBits(Vector256<long> value, Vector256<sbyte> control);
public static Vector256<byte> MaskShuffleBits(Vector256<byte> mask, Vector256<ulong> value, Vector256<byte> control);
public static Vector256<sbyte> MaskShuffleBits(Vector256<sbyte> mask, Vector256<long> value, Vector256<sbyte> control);
public static Vector128<byte> ShuffleBits(Vector128<ulong> value, Vector128<byte> control);
public static Vector128<sbyte> ShuffleBits(Vector128<long> value, Vector128<sbyte> control);
public static Vector128<byte> MaskShuffleBits(Vector128<byte> mask, Vector128<ulong> value, Vector128<byte> control);
public static Vector128<sbyte> MaskShuffleBits(Vector128<sbyte> mask, Vector128<long> value, Vector128<sbyte> control);
}
}API Usage
var blocksPerRows = Avx512BitAlg.PopCount(board);Alternative Designs
Avx512Vpopcntdqcould have a different name.MaskBitShufflecould have a different name and/or parameter/return types (e.g.maskand return type could beulonginstead ofVector512<byte>).
Risks
None
MichalPetryka, PaulusParssinen, saucecontrol, Mrnikbobjeff, miyu and 2 more
Metadata
Metadata
Assignees
Labels
api-approvedAPI was approved in API review, it can be implementedAPI was approved in API review, it can be implementedarea-System.Runtime.Intrinsicsavx512Related to the AVX-512 architectureRelated to the AVX-512 architecture