diff --git a/src/coreclr/jit/codegenarm64.cpp b/src/coreclr/jit/codegenarm64.cpp index 1ba4369e342368..c6a1a0f85b0004 100644 --- a/src/coreclr/jit/codegenarm64.cpp +++ b/src/coreclr/jit/codegenarm64.cpp @@ -2338,6 +2338,50 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre break; } + + case GT_CNS_MSK: + { + GenTreeMskCon* mask = tree->AsMskCon(); + emitter* emit = GetEmitter(); + + // Try every type until a match is found + + if (mask->IsZero()) + { + emit->emitInsSve_R(INS_sve_pfalse, EA_SCALABLE, targetReg, INS_OPTS_SCALABLE_B); + break; + } + + insOpts opt = INS_OPTS_SCALABLE_B; + SveMaskPattern pat = EvaluateSimdMaskToPattern(TYP_BYTE, mask->gtSimdMaskVal); + + if (pat == SveMaskPatternNone) + { + opt = INS_OPTS_SCALABLE_H; + pat = EvaluateSimdMaskToPattern(TYP_SHORT, mask->gtSimdMaskVal); + } + + if (pat == SveMaskPatternNone) + { + opt = INS_OPTS_SCALABLE_S; + pat = EvaluateSimdMaskToPattern(TYP_INT, mask->gtSimdMaskVal); + } + + if (pat == SveMaskPatternNone) + { + opt = INS_OPTS_SCALABLE_D; + pat = EvaluateSimdMaskToPattern(TYP_LONG, mask->gtSimdMaskVal); + } + + // Should only ever create constant masks for valid patterns. + if (pat == SveMaskPatternNone) + { + unreached(); + } + + emit->emitIns_R_PATTERN(INS_sve_ptrue, EA_SCALABLE, targetReg, opt, (insSvePattern)pat); + break; + } #endif // FEATURE_SIMD default: diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 83b7288075f6f4..1810248529fbaf 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3146,8 +3146,8 @@ class Compiler var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize); #if defined(TARGET_ARM64) - GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize); - GenTree* gtNewSimdFalseMaskByteNode(unsigned simdSize); + GenTree* gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType); + GenTree* gtNewSimdFalseMaskByteNode(); #endif GenTree* gtNewSimdBinOpNode(genTreeOps op, @@ -3715,6 +3715,7 @@ class Compiler #if defined(FEATURE_HW_INTRINSICS) GenTree* gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree); + GenTreeMskCon* gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon); #endif // FEATURE_HW_INTRINSICS // Options to control behavior of gtTryRemoveBoxUpstreamEffects diff --git a/src/coreclr/jit/gentree.cpp b/src/coreclr/jit/gentree.cpp index ede13bb5ee7f3b..ae3a96765bd38e 100644 --- a/src/coreclr/jit/gentree.cpp +++ b/src/coreclr/jit/gentree.cpp @@ -21945,8 +21945,8 @@ GenTree* Compiler::gtNewSimdCvtVectorToMaskNode(var_types type, #if defined(TARGET_XARCH) return gtNewSimdHWIntrinsicNode(TYP_MASK, op1, NI_AVX512_ConvertVectorToMask, simdBaseJitType, simdSize); #elif defined(TARGET_ARM64) - // We use cmpne which requires an embedded mask. - GenTree* trueMask = gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + // ConvertVectorToMask uses cmpne which requires an embedded mask. + GenTree* trueMask = gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_ConversionTrueMask, simdBaseJitType, simdSize); return gtNewSimdHWIntrinsicNode(TYP_MASK, trueMask, op1, NI_Sve_ConvertVectorToMask, simdBaseJitType, simdSize); #else #error Unsupported platform @@ -31669,6 +31669,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } #if defined(FEATURE_MASKED_HW_INTRINSICS) + // Fold ConvertMaskToVector(ConvertVectorToMask(vec)) to vec if (tree->OperIsConvertMaskToVector()) { GenTree* op = op1; @@ -31701,6 +31702,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } } + // Fold ConvertVectorToMask(ConvertMaskToVector(mask)) to mask if (tree->OperIsConvertVectorToMask()) { GenTree* op = op1; @@ -31709,11 +31711,9 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) #if defined(TARGET_XARCH) tryHandle = op->OperIsHWIntrinsic(); #elif defined(TARGET_ARM64) - if (op->OperIsHWIntrinsic(NI_Sve_CreateTrueMaskAll)) - { - op = op2; - tryHandle = op->OperIsHWIntrinsic(); - } + assert(op->OperIsHWIntrinsic(NI_Sve_ConversionTrueMask)); + op = op2; + tryHandle = op->OperIsHWIntrinsic(); #endif // TARGET_ARM64 if (tryHandle) @@ -31799,53 +31799,12 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) resultNode = gtNewVconNode(retType, &simdVal); } +#if defined(TARGET_XARCH) else if (tree->OperIsConvertVectorToMask()) { - GenTreeVecCon* vecCon = cnsNode->AsVecCon(); - GenTreeMskCon* mskCon = gtNewMskConNode(retType); - - switch (vecCon->TypeGet()) - { - case TYP_SIMD8: - { - EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd8Val); - break; - } - - case TYP_SIMD12: - { - EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd12Val); - break; - } - - case TYP_SIMD16: - { - EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd16Val); - break; - } - -#if defined(TARGET_XARCH) - case TYP_SIMD32: - { - EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd32Val); - break; - } - - case TYP_SIMD64: - { - EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd64Val); - break; - } -#endif // TARGET_XARCH - - default: - { - unreached(); - } - } - - resultNode = mskCon; + resultNode = gtFoldExprConvertVecCnsToMask(tree, cnsNode->AsVecCon()); } +#endif // TARGET_XARCH #endif // FEATURE_MASKED_HW_INTRINSICS else { @@ -32688,6 +32647,10 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) switch (ni) { #ifdef TARGET_ARM64 + case NI_Sve_ConvertVectorToMask: + resultNode = gtFoldExprConvertVecCnsToMask(tree, cnsNode->AsVecCon()); + break; + case NI_AdvSimd_MultiplyByScalar: case NI_AdvSimd_Arm64_MultiplyByScalar: { @@ -32829,7 +32792,18 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) break; } - if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet()) +#if defined(TARGET_ARM64) + if (ni == NI_Sve_ConditionalSelect) + { + assert(!op1->IsVectorAllBitsSet() && !op1->IsVectorZero()); + } + else + { + assert(!op1->IsTrueMask(simdBaseType) && !op1->IsFalseMask()); + } +#endif + + if (op1->IsVectorAllBitsSet() || op1->IsTrueMask(simdBaseType)) { if ((op3->gtFlags & GTF_SIDE_EFFECT) != 0) { @@ -32843,7 +32817,7 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) return op2; } - if (op1->IsVectorZero()) + if (op1->IsVectorZero() || op1->IsFalseMask()) { return gtWrapWithSideEffects(op3, op2, GTF_ALL_EFFECT); } @@ -32895,6 +32869,70 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree) } return resultNode; } + +//------------------------------------------------------------------------------ +// gtFoldExprConvertVecCnsToMask: Folds a constant vector plus conversion to +// mask into a constant mask. +// +// Arguments: +// tree - The convert vector to mask node +// vecCon - The vector constant converted by the convert +// +// Return Value: +// Returns a constant mask +// +GenTreeMskCon* Compiler::gtFoldExprConvertVecCnsToMask(GenTreeHWIntrinsic* tree, GenTreeVecCon* vecCon) +{ + assert(tree->OperIsConvertVectorToMask()); + assert(vecCon == tree->Op(1) || vecCon == tree->Op(2)); + + var_types retType = tree->TypeGet(); + var_types simdBaseType = tree->GetSimdBaseType(); + GenTreeMskCon* mskCon = gtNewMskConNode(retType); + + switch (vecCon->TypeGet()) + { + case TYP_SIMD8: + { + EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd8Val); + break; + } + + case TYP_SIMD12: + { + EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd12Val); + break; + } + + case TYP_SIMD16: + { + EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd16Val); + break; + } + +#if defined(TARGET_XARCH) + case TYP_SIMD32: + { + EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd32Val); + break; + } + + case TYP_SIMD64: + { + EvaluateSimdCvtVectorToMask(simdBaseType, &mskCon->gtSimdMaskVal, vecCon->gtSimd64Val); + break; + } +#endif // TARGET_XARCH + + default: + { + unreached(); + } + } + + return mskCon; +} + #endif // FEATURE_HW_INTRINSICS //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/gentree.h b/src/coreclr/jit/gentree.h index 5e425db7271d93..c5d49fbacfca3a 100644 --- a/src/coreclr/jit/gentree.h +++ b/src/coreclr/jit/gentree.h @@ -1802,8 +1802,8 @@ struct GenTree inline bool IsVectorCreate() const; inline bool IsVectorAllBitsSet() const; inline bool IsVectorBroadcast(var_types simdBaseType) const; - inline bool IsMaskAllBitsSet() const; - inline bool IsMaskZero() const; + inline bool IsTrueMask(var_types simdBaseType) const; + inline bool IsFalseMask() const; inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType); @@ -9550,54 +9550,46 @@ inline bool GenTree::IsVectorBroadcast(var_types simdBaseType) const return false; } -inline bool GenTree::IsMaskAllBitsSet() const +//------------------------------------------------------------------------ +// IsTrueMask: Is the given node a true mask +// +// Arguments: +// simdBaseType - the base type of the mask +// +// Returns true if the node is a true mask for the given simdBaseType. +// +// Note that a byte true mask (1111...) is different to an int true mask +// (10001000...), therefore the simdBaseType of the mask needs to be +// taken into account. +// +inline bool GenTree::IsTrueMask(var_types simdBaseType) const { #ifdef TARGET_ARM64 - static_assert_no_msg(AreContiguous(NI_Sve_CreateTrueMaskByte, NI_Sve_CreateTrueMaskDouble, - NI_Sve_CreateTrueMaskInt16, NI_Sve_CreateTrueMaskInt32, - NI_Sve_CreateTrueMaskInt64, NI_Sve_CreateTrueMaskSByte, - NI_Sve_CreateTrueMaskSingle, NI_Sve_CreateTrueMaskUInt16, - NI_Sve_CreateTrueMaskUInt32, NI_Sve_CreateTrueMaskUInt64)); + // TODO-SVE: For agnostic VL, vector type may not be simd16_t - if (OperIsHWIntrinsic()) + if (IsCnsMsk()) { - NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId(); - if (id == NI_Sve_ConvertMaskToVector) - { - GenTree* op1 = AsHWIntrinsic()->Op(1); - assert(op1->OperIsHWIntrinsic()); - id = op1->AsHWIntrinsic()->GetHWIntrinsicId(); - } - return ((id == NI_Sve_CreateTrueMaskAll) || - ((id >= NI_Sve_CreateTrueMaskByte) && (id <= NI_Sve_CreateTrueMaskUInt64))); + return SveMaskPatternAll == EvaluateSimdMaskToPattern(simdBaseType, AsMskCon()->gtSimdMaskVal); } - #endif + return false; } -inline bool GenTree::IsMaskZero() const +//------------------------------------------------------------------------ +// IsFalseMask: Is the given node a false mask +// +// Returns true if the node is a false mask, ie all zeros +// +inline bool GenTree::IsFalseMask() const { #ifdef TARGET_ARM64 - static_assert_no_msg(AreContiguous(NI_Sve_CreateFalseMaskByte, NI_Sve_CreateFalseMaskDouble, - NI_Sve_CreateFalseMaskInt16, NI_Sve_CreateFalseMaskInt32, - NI_Sve_CreateFalseMaskInt64, NI_Sve_CreateFalseMaskSByte, - NI_Sve_CreateFalseMaskSingle, NI_Sve_CreateFalseMaskUInt16, - NI_Sve_CreateFalseMaskUInt32, NI_Sve_CreateFalseMaskUInt64)); - - if (OperIsHWIntrinsic()) + if (IsCnsMsk()) { - NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId(); - if (id == NI_Sve_ConvertMaskToVector) - { - GenTree* op1 = AsHWIntrinsic()->Op(1); - assert(op1->OperIsHWIntrinsic()); - id = op1->AsHWIntrinsic()->GetHWIntrinsicId(); - } - return ((id >= NI_Sve_CreateFalseMaskByte) && (id <= NI_Sve_CreateFalseMaskUInt64)); + return AsMskCon()->IsZero(); } - #endif + return false; } diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index dfd3151c310bb9..d13f6fcf38aa26 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -2454,6 +2454,14 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, { retNode = impSpecialIntrinsic(intrinsic, clsHnd, method, sig R2RARG(entryPoint), simdBaseJitType, nodeRetType, simdSize, mustExpand); + +#if defined(FEATURE_MASKED_HW_INTRINSICS) && defined(TARGET_ARM64) + if (retNode != nullptr) + { + // The special import may have switched the type of the node. + nodeRetType = retNode->gtType; + } +#endif } if (setMethodHandle && (retNode != nullptr)) @@ -2526,18 +2534,10 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } } - if (retType != nodeRetType) + if (nodeRetType == TYP_MASK) { // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. - assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)); - assert(nodeRetType == TYP_MASK); - - GenTreeHWIntrinsic* op = retNode->AsHWIntrinsic(); - - CorInfoType simdBaseJitType = op->GetSimdBaseJitType(); - unsigned simdSize = op->GetSimdSize(); - - retNode = gtNewSimdCvtMaskToVectorNode(retType, op, simdBaseJitType, simdSize); + retNode = gtNewSimdCvtMaskToVectorNode(retType, retNode, simdBaseJitType, simdSize); } #endif // FEATURE_MASKED_HW_INTRINSICS && TARGET_ARM64 diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 083311812ecac2..ddea76853da28c 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2780,6 +2780,56 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Sve_CreateFalseMaskByte: + case NI_Sve_CreateFalseMaskDouble: + case NI_Sve_CreateFalseMaskInt16: + case NI_Sve_CreateFalseMaskInt32: + case NI_Sve_CreateFalseMaskInt64: + case NI_Sve_CreateFalseMaskSByte: + case NI_Sve_CreateFalseMaskSingle: + case NI_Sve_CreateFalseMaskUInt16: + case NI_Sve_CreateFalseMaskUInt32: + case NI_Sve_CreateFalseMaskUInt64: + { + // Import as a constant vector 0 + GenTreeVecCon* vecCon = gtNewVconNode(retType); + vecCon->gtSimdVal = simd_t::Zero(); + retNode = vecCon; + break; + } + + case NI_Sve_CreateTrueMaskByte: + case NI_Sve_CreateTrueMaskDouble: + case NI_Sve_CreateTrueMaskInt16: + case NI_Sve_CreateTrueMaskInt32: + case NI_Sve_CreateTrueMaskInt64: + case NI_Sve_CreateTrueMaskSByte: + case NI_Sve_CreateTrueMaskSingle: + case NI_Sve_CreateTrueMaskUInt16: + case NI_Sve_CreateTrueMaskUInt32: + case NI_Sve_CreateTrueMaskUInt64: + { + assert(sig->numArgs == 1); + op1 = impPopStack().val; + + // Where possible, import a constant mask to allow for optimisations. + if (op1->IsIntegralConst()) + { + int64_t pattern = op1->AsIntConCommon()->IntegralValue(); + simd_t simdVal; + + if (EvaluateSimdPatternToVector(simdBaseType, &simdVal, (SveMaskPattern)pattern)) + { + retNode = gtNewVconNode(retType, &simdVal); + break; + } + } + + // Was not able to generate a pattern, instead import a truemaskall + retNode = gtNewSimdHWIntrinsicNode(TYP_MASK, op1, intrinsic, simdBaseJitType, simdSize); + break; + } + case NI_Sve_Load2xVectorAndUnzip: case NI_Sve_Load3xVectorAndUnzip: case NI_Sve_Load4xVectorAndUnzip: @@ -3297,32 +3347,41 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } //------------------------------------------------------------------------ -// gtNewSimdAllTrueMaskNode: Create an embedded mask with all bits set to true +// gtNewSimdAllTrueMaskNode: Create a mask with all bits set to true // // Arguments: // simdBaseJitType -- the base jit type of the nodes being masked -// simdSize -- the simd size of the nodes being masked // // Return Value: // The mask // -GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType, unsigned simdSize) +GenTree* Compiler::gtNewSimdAllTrueMaskNode(CorInfoType simdBaseJitType) { - return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateTrueMaskAll, simdBaseJitType, simdSize); + // Import as a constant mask + + var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType); + GenTreeMskCon* mskCon = gtNewMskConNode(TYP_MASK); + + // TODO-SVE: For agnostic VL, vector type may not be simd16_t + + bool found = EvaluateSimdPatternToMask(simdBaseType, &mskCon->gtSimdMaskVal, SveMaskPatternAll); + assert(found); + + return mskCon; } //------------------------------------------------------------------------ -// gtNewSimdFalseMaskByteNode: Create an embedded mask with all bits set to false -// -// Arguments: -// simdSize -- the simd size of the nodes being masked +// gtNewSimdFalseMaskByteNode: Create a mask with all bits set to false // // Return Value: // The mask // -GenTree* Compiler::gtNewSimdFalseMaskByteNode(unsigned simdSize) +GenTree* Compiler::gtNewSimdFalseMaskByteNode() { - return gtNewSimdHWIntrinsicNode(TYP_MASK, NI_Sve_CreateFalseMaskByte, CORINFO_TYPE_UBYTE, simdSize); + // Import as a constant mask 0 + GenTreeMskCon* mskCon = gtNewMskConNode(TYP_MASK); + mskCon->gtSimdMaskVal = simdmask_t::Zero(); + return mskCon; } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index e83fb55e1a18c5..f4161665defa43 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -512,7 +512,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // destination using /Z. assert((targetReg != embMaskOp2Reg) || (embMaskOp1Reg == embMaskOp2Reg)); - assert(intrin.op3->isContained() || !intrin.op1->IsMaskAllBitsSet()); + assert(intrin.op3->isContained() || !intrin.op1->IsTrueMask(node->GetSimdBaseType())); GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt); } else @@ -610,7 +610,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { assert(intrin.op3->IsVectorZero()); - if (intrin.op1->isContained() || intrin.op1->IsMaskAllBitsSet()) + if (intrin.op1->isContained() || intrin.op1->IsTrueMask(node->GetSimdBaseType())) { // We already skip importing ConditionalSelect if op1 == trueAll, however // if we still see it here, it is because we wrapped the predicated instruction @@ -2033,7 +2033,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case NI_Sve_CreateTrueMaskAll: + case NI_Sve_ConversionTrueMask: // Must use the pattern variant, as the non-pattern varient is SVE2.1. GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, SVE_PATTERN_ALL); break; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 47c73db897ad9e..6646981f7b33dd 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -63,28 +63,28 @@ HARDWARE_INTRINSIC(Sve, CreateBreakAfterPropagateMask, HARDWARE_INTRINSIC(Sve, CreateBreakBeforeMask, -1, 2, {INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_sve_brkb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen) HARDWARE_INTRINSIC(Sve, CreateBreakBeforePropagateMask, -1, 3, {INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_sve_brkpb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_ZeroingMaskedOperation) HARDWARE_INTRINSIC(Sve, CreateBreakPropagateMask, -1, -1, {INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_sve_brkn, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics|HW_Flag_ZeroingMaskedOperation) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskByte, -1, 0, {INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskDouble, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt16, -1, 0, {INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskSByte, -1, 0, {INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskSingle, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt16, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskByte, -1, 0, {INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskDouble, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt16, -1, 0, {INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskSByte, -1, 0, {INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskSingle, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt16, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt32, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateFalseMaskUInt64, -1, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_pfalse, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Sve, CreateMaskForFirstActiveElement, -1, 2, {INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_sve_pfirst, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_SpecialCodeGen|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, CreateMaskForNextActiveElement, -1, 2, {INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_sve_pnext, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_HasRMWSemantics) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskByte, -1, 1, {INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskDouble, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt16, -1, 1, {INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSByte, -1, 1, {INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) +HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialImport) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) @@ -340,7 +340,8 @@ HARDWARE_INTRINSIC(Sve, ConditionalExtractAfterLastActiveElementScalar HARDWARE_INTRINSIC(Sve, ConditionalExtractLastActiveElementScalar, 0, 3, {INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb, INS_sve_clastb}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, ConvertMaskToVector, -1, 1, {INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov, INS_sve_mov}, HW_Category_Helper, HW_Flag_Scalable) HARDWARE_INTRINSIC(Sve, ConvertVectorToMask, -1, 2, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_ReturnsPerElementMask|HW_Flag_LowMaskedOperation) -HARDWARE_INTRINSIC(Sve, CreateTrueMaskAll, -1, 0, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) +// True mask only used inside a ConvertVectorToMask +HARDWARE_INTRINSIC(Sve, ConversionTrueMask, -1, 0, {INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue, INS_sve_ptrue}, HW_Category_Helper, HW_Flag_Scalable|HW_Flag_ReturnsPerElementMask) // Scalar variants of Saturating*By*BitElementCount. There is 8bit versions as the generic version is scalar only. HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy16BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdech, INS_sve_uqdech, INS_sve_sqdech, INS_sve_uqdech, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, SaturatingDecrementBy32BitElementCountScalar, 0, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sqdecw, INS_sve_uqdecw, INS_sve_sqdecw, INS_sve_uqdecw, INS_invalid, INS_invalid}, HW_Category_Scalar, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport|HW_Flag_HasRMWSemantics) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 81d35e2519d37c..1d69b329e760bd 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -1813,13 +1813,16 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) case NI_AdvSimd_FusedMultiplyAddScalar: LowerHWIntrinsicFusedMultiplyAddScalar(node); break; + case NI_Sve_ConditionalSelect: return LowerHWIntrinsicCndSel(node); + case NI_Sve_SetFfr: { StoreFFRValue(node); break; } + case NI_Sve_GetFfrByte: case NI_Sve_GetFfrInt16: case NI_Sve_GetFfrInt32: @@ -1968,7 +1971,7 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) var_types simdType = Compiler::getSIMDTypeForSize(simdSize); bool foundUse = BlockRange().TryGetUse(node, &use); - GenTree* trueMask = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize); + GenTree* trueMask = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType); GenTree* falseVal = comp->gtNewZeroConNode(simdType); var_types nodeType = simdType; @@ -3939,11 +3942,12 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) GenTree* op3 = intrin.op3; // Handle op1 - if (op1->IsVectorZero()) + if (op1->IsFalseMask()) { // When we are merging with zero, we can specialize // and avoid instantiating the vector constant. MakeSrcContained(node, op1); + LABELEDDISPTREERANGE("Contained false mask op1 in ConditionalSelect", BlockRange(), op1); } // Handle op2 @@ -3953,14 +3957,15 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (IsInvariantInRange(op2, node) && op2->isEmbeddedMaskingCompatibleHWIntrinsic()) { + bool contain = false; uint32_t maskSize = genTypeSize(node->GetSimdBaseType()); uint32_t operSize = genTypeSize(op2->AsHWIntrinsic()->GetSimdBaseType()); + if (maskSize == operSize) { // If the size of baseType of operation matches that of maskType, then contain // the operation - MakeSrcContained(node, op2); - op2->MakeEmbMaskOp(); + contain = true; } else { @@ -3979,10 +3984,16 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) uint32_t auxSize = genTypeSize(embOp->GetAuxiliaryType()); if (maskSize == auxSize) { - MakeSrcContained(node, op2); - op2->MakeEmbMaskOp(); + contain = true; } } + + if (contain) + { + MakeSrcContained(node, op2); + op2->MakeEmbMaskOp(); + LABELEDDISPTREERANGE("Contained op2 in ConditionalSelect", BlockRange(), node); + } } // Handle intrinsics with embedded masks and immediate operands @@ -3993,17 +4004,19 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) if (embOp->Op(2)->IsCnsIntOrI()) { MakeSrcContained(op2, embOp->Op(2)); + LABELEDDISPTREERANGE("Contained ShiftRight in ConditionalSelect", BlockRange(), op2); } } } // Handle op3 - if (op3->IsVectorZero() && op1->IsMaskAllBitsSet()) + if (op3->IsVectorZero() && op1->IsTrueMask(node->GetSimdBaseType()) && op2->IsEmbMaskOp()) { // When we are merging with zero, we can specialize // and avoid instantiating the vector constant. // Do this only if op1 was AllTrueMask MakeSrcContained(node, op3); + LABELEDDISPTREERANGE("Contained false mask op3 in ConditionalSelect", BlockRange(), op3); } break; @@ -4120,13 +4133,14 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) // op3 is all zeros. Such a Csel operation is absorbed into the instruction when emitted. Skip this // optimisation when the nestedOp is a reduce operation. - if (nestedOp1->IsMaskAllBitsSet() && !HWIntrinsicInfo::IsReduceOperation(nestedOp2Id) && + if (nestedOp1->IsTrueMask(cndSelNode->GetSimdBaseType()) && + !HWIntrinsicInfo::IsReduceOperation(nestedOp2Id) && (!HWIntrinsicInfo::IsZeroingMaskedOperation(nestedOp2Id) || op3->IsVectorZero())) { GenTree* nestedOp2 = nestedCndSel->Op(2); GenTree* nestedOp3 = nestedCndSel->Op(3); - LABELEDDISPTREERANGE("Removed nested conditionalselect (before):", BlockRange(), cndSelNode); + LABELEDDISPTREERANGE("Removed nested conditionalselect (before)", BlockRange(), cndSelNode); // Transform: // @@ -4144,7 +4158,7 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode) } } } - else if (op1->IsMaskAllBitsSet()) + else if (op1->IsTrueMask(cndSelNode->GetSimdBaseType())) { // Any case where op2 is not an embedded HWIntrinsic if (!op2->OperIsHWIntrinsic() || diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 65f339748c0c22..9af815bf726c7d 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9727,7 +9727,10 @@ GenTreeHWIntrinsic* Compiler::fgOptimizeForMaskedIntrinsic(GenTreeHWIntrinsic* n return node; } #elif defined(TARGET_ARM64) - return fgMorphTryUseAllMaskVariant(node); + // TODO-SVE: This optimisation is too naive. It needs to calculate the full cost of the instruction + // vs using the predicate version, taking into account all input arguements and all uses + // of the result. + // return fgMorphTryUseAllMaskVariant(node); #else #error Unsupported platform #endif @@ -9782,7 +9785,7 @@ GenTree* Compiler::doMorphVectorOperandToMask(GenTree* node, GenTreeHWIntrinsic* else if (node->IsVectorZero()) { // Morph the vector of zeroes into mask of zeroes. - GenTree* mask = gtNewSimdFalseMaskByteNode(parent->GetSimdSize()); + GenTree* mask = gtNewSimdFalseMaskByteNode(); mask->SetMorphed(this); return mask; } diff --git a/src/coreclr/jit/simd.h b/src/coreclr/jit/simd.h index d0450fa91caff6..9841bdeb38c93c 100644 --- a/src/coreclr/jit/simd.h +++ b/src/coreclr/jit/simd.h @@ -1526,7 +1526,7 @@ void EvaluateSimdCvtMaskToVector(TSimd* result, simdmask_t arg0) isSet = ((mask >> i) & 1) != 0; #elif defined(TARGET_ARM64) // For Arm64 we have count total bits to read, but - // they are sizeof(TBase) bits apart. We still set + // they are sizeof(TBase) bits apart. We set // the result element to AllBitsSet or Zero depending // on the corresponding mask bit @@ -1598,14 +1598,17 @@ void EvaluateSimdCvtVectorToMask(simdmask_t* result, TSimd arg0) uint32_t count = sizeof(TSimd) / sizeof(TBase); uint64_t mask = 0; - TBase mostSignificantBit = static_cast(1) << ((sizeof(TBase) * 8) - 1); + TBase significantBit = 1; +#if defined(TARGET_XARCH) + significantBit = static_cast(1) << ((sizeof(TBase) * 8) - 1); +#endif for (uint32_t i = 0; i < count; i++) { TBase input0; memcpy(&input0, &arg0.u8[i * sizeof(TBase)], sizeof(TBase)); - if ((input0 & mostSignificantBit) != 0) + if ((input0 & significantBit) != 0) { #if defined(TARGET_XARCH) // For xarch we have count sequential bits to write @@ -1615,9 +1618,9 @@ void EvaluateSimdCvtVectorToMask(simdmask_t* result, TSimd arg0) mask |= static_cast(1) << i; #elif defined(TARGET_ARM64) // For Arm64 we have count total bits to write, but - // they are sizeof(TBase) bits apart. We still set + // they are sizeof(TBase) bits apart. We set // depending on if the corresponding input element - // has its most significant bit set + // has its least significant bit set mask |= static_cast(1) << (i * sizeof(TBase)); #else @@ -1670,6 +1673,328 @@ void EvaluateSimdCvtVectorToMask(var_types baseType, simdmask_t* result, TSimd a } } } + +#if defined(TARGET_ARM64) + +enum SveMaskPattern +{ + SveMaskPatternLargestPowerOf2 = 0, // The largest power of 2. + SveMaskPatternVectorCount1 = 1, // Exactly 1 element. + SveMaskPatternVectorCount2 = 2, // Exactly 2 elements. + SveMaskPatternVectorCount3 = 3, // Exactly 3 elements. + SveMaskPatternVectorCount4 = 4, // Exactly 4 elements. + SveMaskPatternVectorCount5 = 5, // Exactly 5 elements. + SveMaskPatternVectorCount6 = 6, // Exactly 6 elements. + SveMaskPatternVectorCount7 = 7, // Exactly 7 elements. + SveMaskPatternVectorCount8 = 8, // Exactly 8 elements. + SveMaskPatternVectorCount16 = 9, // Exactly 16 elements. + SveMaskPatternVectorCount32 = 10, // Exactly 32 elements. + SveMaskPatternVectorCount64 = 11, // Exactly 64 elements. + SveMaskPatternVectorCount128 = 12, // Exactly 128 elements. + SveMaskPatternVectorCount256 = 13, // Exactly 256 elements. + SveMaskPatternLargestMultipleOf4 = 29, // The largest multiple of 4. + SveMaskPatternLargestMultipleOf3 = 30, // The largest multiple of 3. + SveMaskPatternAll = 31, // All available (implicitly a multiple of two). + SveMaskPatternNone = 14 // Invalid +}; + +template +bool EvaluateSimdPatternToMask(simdmask_t* result, SveMaskPattern pattern) +{ + uint32_t count = sizeof(TSimd) / sizeof(TBase); + uint32_t finalOne = count + 1; + uint64_t mask = 0; + + switch (pattern) + { + case SveMaskPatternLargestPowerOf2: + case SveMaskPatternAll: + finalOne = count; + break; + + case SveMaskPatternVectorCount1: + case SveMaskPatternVectorCount2: + case SveMaskPatternVectorCount3: + case SveMaskPatternVectorCount4: + case SveMaskPatternVectorCount5: + case SveMaskPatternVectorCount6: + case SveMaskPatternVectorCount7: + case SveMaskPatternVectorCount8: + finalOne = pattern - SveMaskPatternVectorCount1 + 1; + break; + + case SveMaskPatternVectorCount16: + case SveMaskPatternVectorCount32: + case SveMaskPatternVectorCount64: + case SveMaskPatternVectorCount128: + case SveMaskPatternVectorCount256: + finalOne = std::min(uint32_t(16 << (pattern - SveMaskPatternVectorCount16)), count); + break; + + case SveMaskPatternLargestMultipleOf4: + finalOne = (count - (count % 4)); + break; + + case SveMaskPatternLargestMultipleOf3: + finalOne = (count - (count % 3)); + break; + + default: + return false; + } + assert(finalOne <= count); + assert(finalOne > 0); + + // Write finalOne number of bits + for (uint32_t i = 0; i < finalOne; i++) + { + mask |= static_cast(1) << (i * sizeof(TBase)); + } + + memcpy(&result->u8[0], &mask, sizeof(uint64_t)); + return true; +} + +template +bool EvaluateSimdPatternToMask(var_types baseType, simdmask_t* result, SveMaskPattern pattern) +{ + switch (baseType) + { + case TYP_FLOAT: + case TYP_INT: + case TYP_UINT: + { + return EvaluateSimdPatternToMask(result, pattern); + } + + case TYP_DOUBLE: + case TYP_LONG: + case TYP_ULONG: + { + return EvaluateSimdPatternToMask(result, pattern); + } + + case TYP_BYTE: + case TYP_UBYTE: + { + return EvaluateSimdPatternToMask(result, pattern); + } + + case TYP_SHORT: + case TYP_USHORT: + { + return EvaluateSimdPatternToMask(result, pattern); + } + + default: + { + unreached(); + } + } +} + +template +bool EvaluateSimdPatternToVector(simd_t* result, SveMaskPattern pattern) +{ + uint32_t count = sizeof(TSimd) / sizeof(TBase); + uint32_t finalOne = count + 1; + + switch (pattern) + { + case SveMaskPatternLargestPowerOf2: + case SveMaskPatternAll: + finalOne = count; + break; + + case SveMaskPatternVectorCount1: + case SveMaskPatternVectorCount2: + case SveMaskPatternVectorCount3: + case SveMaskPatternVectorCount4: + case SveMaskPatternVectorCount5: + case SveMaskPatternVectorCount6: + case SveMaskPatternVectorCount7: + case SveMaskPatternVectorCount8: + finalOne = std::min(uint32_t(pattern - SveMaskPatternVectorCount1 + 1), count); + break; + + case SveMaskPatternVectorCount16: + case SveMaskPatternVectorCount32: + case SveMaskPatternVectorCount64: + case SveMaskPatternVectorCount128: + case SveMaskPatternVectorCount256: + finalOne = std::min(uint32_t(16 << (pattern - SveMaskPatternVectorCount16)), count); + break; + + case SveMaskPatternLargestMultipleOf4: + finalOne = (count - (count % 4)); + break; + + case SveMaskPatternLargestMultipleOf3: + finalOne = (count - (count % 3)); + break; + + default: + return false; + } + assert(finalOne <= count); + assert(finalOne > 0); + + // Write finalOne number of entries + for (uint32_t i = 0; i < count; i++) + { + TBase output; + + if (i < finalOne) + { + memset(&output, 0xFF, sizeof(TBase)); + } + else + { + memset(&output, 0x00, sizeof(TBase)); + } + + memcpy(&result->u8[i * sizeof(TBase)], &output, sizeof(TBase)); + } + + return true; +} + +template +bool EvaluateSimdPatternToVector(var_types baseType, TSimd* result, SveMaskPattern pattern) +{ + switch (baseType) + { + case TYP_FLOAT: + case TYP_INT: + case TYP_UINT: + { + return EvaluateSimdPatternToVector(result, pattern); + } + + case TYP_DOUBLE: + case TYP_LONG: + case TYP_ULONG: + { + return EvaluateSimdPatternToVector(result, pattern); + } + + case TYP_BYTE: + case TYP_UBYTE: + { + return EvaluateSimdPatternToVector(result, pattern); + } + + case TYP_SHORT: + case TYP_USHORT: + { + return EvaluateSimdPatternToVector(result, pattern); + } + + default: + { + unreached(); + } + } +} + +template +SveMaskPattern EvaluateSimdMaskToPattern(simdmask_t arg0) +{ + uint32_t count = sizeof(TSimd) / sizeof(TBase); + + uint64_t mask; + memcpy(&mask, &arg0.u8[0], sizeof(uint64_t)); + uint32_t finalOne = count; + + // A mask pattern starts with zero of more 1s and then the rest of the mask is filled with 0s. + + // Find an unbroken sequence of 1s. + for (uint32_t i = 0; i < count; i++) + { + // For Arm64 we have count total bits to read, but + // they are sizeof(TBase) bits apart. We set + // the result element to AllBitsSet or Zero depending + // on the corresponding mask bit + + bool isSet = ((mask >> (i * sizeof(TBase))) & 1) != 0; + if (!isSet) + { + finalOne = i; + break; + } + } + + // Find an unbroken sequence of 0s. + for (uint32_t i = finalOne; i < count; i++) + { + // For Arm64 we have count total bits to read, but + // they are sizeof(TBase) bits apart. We set + // the result element to AllBitsSet or Zero depending + // on the corresponding mask bit + + bool isSet = ((mask >> (i * sizeof(TBase))) & 1) != 0; + if (isSet) + { + // Invalid sequence + return SveMaskPatternNone; + } + } + + if (finalOne == count) + { + return SveMaskPatternAll; + } + else if (finalOne >= SveMaskPatternVectorCount1 && finalOne <= SveMaskPatternVectorCount8) + { + return (SveMaskPattern)finalOne; + } + else + { + // TODO: Add other patterns as required. These probably won't be seen until we get + // to wider vector lengths. + return SveMaskPatternNone; + } +} + +template +SveMaskPattern EvaluateSimdMaskToPattern(var_types baseType, simdmask_t arg0) +{ + switch (baseType) + { + case TYP_FLOAT: + case TYP_INT: + case TYP_UINT: + { + return EvaluateSimdMaskToPattern(arg0); + } + + case TYP_DOUBLE: + case TYP_LONG: + case TYP_ULONG: + { + return EvaluateSimdMaskToPattern(arg0); + } + + case TYP_BYTE: + case TYP_UBYTE: + { + return EvaluateSimdMaskToPattern(arg0); + } + + case TYP_SHORT: + case TYP_USHORT: + { + return EvaluateSimdMaskToPattern(arg0); + } + + default: + { + unreached(); + } + } +} +#endif // TARGET_ARM64 + #endif // FEATURE_MASKED_HW_INTRINSICS #ifdef FEATURE_SIMD diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadNonFaultingUnOpTest.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadNonFaultingUnOpTest.template index f5364238d58e01..db2416974cdfd4 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadNonFaultingUnOpTest.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadNonFaultingUnOpTest.template @@ -25,7 +25,7 @@ namespace JIT.HardwareIntrinsics.Arm [Fact] public static void {TestName}() { - var test = new LoadUnaryOpTest__{TestName}(); + var test = new LoadNonFaultingUnaryOpTest__{TestName}(); if (test.IsSupported) { @@ -66,7 +66,7 @@ namespace JIT.HardwareIntrinsics.Arm } } - public sealed unsafe class LoadUnaryOpTest__{TestName} + public sealed unsafe class LoadNonFaultingUnaryOpTest__{TestName} { private struct DataTable { @@ -134,7 +134,7 @@ namespace JIT.HardwareIntrinsics.Arm return testStruct; } - public void RunStructFldScenario(LoadUnaryOpTest__{TestName} testClass) + public void RunStructFldScenario(LoadNonFaultingUnaryOpTest__{TestName} testClass) { var result = {Isa}.{Method}(({Op1BaseType}*)testClass._dataTable.inArray1Ptr); @@ -158,7 +158,7 @@ namespace JIT.HardwareIntrinsics.Arm private DataTable _dataTable; - public LoadUnaryOpTest__{TestName}() + public LoadNonFaultingUnaryOpTest__{TestName}() { Succeeded = true; diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadVectorMaskedTest.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadVectorMaskedTest.template index 6bec8d9481000a..829f9384c33610 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadVectorMaskedTest.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/SveLoadVectorMaskedTest.template @@ -24,7 +24,7 @@ namespace JIT.HardwareIntrinsics.Arm [Fact] public static void {TestName}() { - var test = new LoadUnaryOpTest__{TestName}(); + var test = new LoadVectorMaskTest__{TestName}(); if (test.IsSupported) { @@ -56,7 +56,7 @@ namespace JIT.HardwareIntrinsics.Arm } } - public sealed unsafe class LoadUnaryOpTest__{TestName} + public sealed unsafe class LoadVectorMaskTest__{TestName} { private struct DataTable { @@ -121,7 +121,7 @@ namespace JIT.HardwareIntrinsics.Arm return testStruct; } - public void RunStructFldScenario(LoadUnaryOpTest__{TestName} testClass) + public void RunStructFldScenario(LoadVectorMaskTest__{TestName} testClass) { {Op1VectorType}<{Op1BaseType}> loadMask = Sve.CreateTrueMask{RetBaseType}(SveMaskPattern.All); @@ -148,7 +148,7 @@ namespace JIT.HardwareIntrinsics.Arm private DataTable _dataTable; - public LoadUnaryOpTest__{TestName}() + public LoadVectorMaskTest__{TestName}() { Succeeded = true; diff --git a/src/tests/JIT/opt/SVE/ConstantMasks.cs b/src/tests/JIT/opt/SVE/ConstantMasks.cs new file mode 100644 index 00000000000000..078e60e9b55411 --- /dev/null +++ b/src/tests/JIT/opt/SVE/ConstantMasks.cs @@ -0,0 +1,232 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Unit tests for the masks conversion optimization +// Uses vectors as masks and vice versa. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Threading; +using Xunit; + +public class ConstantMasks +{ + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Consume(T value) { } + + [Fact] + public static void TestEntryPoint() + { + if (Sve.IsSupported) + { + Vector op1 = Vector.Create(11); + Vector op2 = Vector.Create(22); + Vector op3 = Vector.Create(33); + Vector opl1 = Vector.Create(44); + Vector opl2 = Vector.Create(55); + + CndSelectEmbedded(op1, op2, op3); + CndSelectEmbeddedFalseMask(op1, op2); + CndSelectEmbeddedZero(op1, op2); + CndSelectEmbeddedTrueMask(op1, op2); + CndSelectEmbeddedAllBits(op1, op2); + + CndSelectOptionalEmbedded(op1, op2, op3); + CndSelectOptionalEmbeddedFalseMask(op1, op2); + CndSelectOptionalEmbeddedZero(op1, op2); + CndSelectOptionalEmbeddedTrueMask(op1, op2); + CndSelectOptionalEmbeddedAllBits(op1, op2); + + CndSelectEmbeddedOneOp(op1, op2); + CndSelectEmbeddedOneOpFalseMask(op1, op2); + CndSelectEmbeddedOneOpZero(op1, op2); + CndSelectEmbeddedOneOpTrueMask(op1); + CndSelectEmbeddedOneOpAllBits(op1); + + CndSelectEmbeddedReduction(opl1, op2, opl2); + CndSelectEmbeddedReductionFalseMask(op1, opl1); + CndSelectEmbeddedReductionZero(op1, opl1); + CndSelectEmbeddedReductionTrueMask(op1, opl1); + CndSelectEmbeddedReductionAllBits(op1, opl1); + } + } + + // SVE operation (with embedded mask) inside a conditional select + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbedded(Vector mask, Vector op1, Vector op2) { + //ARM64-FULL-LINE: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(mask, Sve.AbsoluteDifference(op1, op2), op1); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedFalseMask(Vector op1, Vector op2) { + //ARM64-FULL-LINE: mov v0.16b, v1.16b + Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.AbsoluteDifference(op1, op2), op2); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedZero(Vector op1, Vector op2) { + //ARM64-FULL-LINE: mov v0.16b, v1.16b + Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.AbsoluteDifference(op1, op2), op2); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedTrueMask(Vector op1, Vector op2) { + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movprfx {{z[0-9]+}}.s, {{p[0-9]+}}/z, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.AbsoluteDifference(op1, op2), op1); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedAllBits(Vector op1, Vector op2) { + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movprfx {{z[0-9]+}}.s, {{p[0-9]+}}/z, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AbsoluteDifference(op1, op2), op1); + Consume(result); + } + + + // SVE operation (with optional embedded mask) inside a conditional select + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbedded(Vector mask, Vector op1, Vector op2) { + //ARM64-FULL-LINE: add {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(mask, Sve.Add(op1, op2), op1); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedFalseMask(Vector op1, Vector op2) { + //ARM64-FULL-LINE: mov v0.16b, v1.16b + Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Add(op1, op2), op2); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedZero(Vector op1, Vector op2) { + //ARM64-FULL-LINE: mov v0.16b, v1.16b + Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.Add(op1, op2), op2); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedTrueMask(Vector op1, Vector op2) { + //ARM64-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.Add(op1, op2), op1); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedAllBits(Vector op1, Vector op2) { + //ARM64-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.Add(op1, op2), op1); + Consume(result); + } + + + // SVE one op operation (with embedded mask) inside a conditional select + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedOneOp(Vector mask, Vector op1) { + //ARM64-FULL-LINE: abs {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(mask, Sve.Abs(op1), op1); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedOneOpFalseMask(Vector dummy, Vector op1) { + //ARM64-FULL-LINE: mov v0.16b, v1.16b + Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Abs(op1), op1); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedOneOpZero(Vector dummy, Vector op1) { + //ARM64-FULL-LINE: mov v0.16b, v1.16b + Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.Abs(op1), op1); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedOneOpTrueMask(Vector op1) { + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE: abs {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.Abs(op1), op1); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedOneOpAllBits(Vector op1) { + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE: abs {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.Abs(op1), op1); + Consume(result); + } + + + // SVE reduction operation (with embedded mask) inside a conditional select. + // The op and conditional select cannot be combined into one instruction. + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReduction(Vector mask, Vector op1, Vector opf) { + //ARM64-FULL-LINE: cmpne {{p[0-9]+}}.d, {{p[0-9]+}}/z, {{z[0-9]+}}.d, #0 + //ARM64-FULL-LINE-NEXT: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: sel {{z[0-9]+}}.d, {{p[0-9]+}}, {{z[0-9]+}}.d, {{z[0-9]+}}.d + Vector result = Sve.ConditionalSelect(mask, Sve.AddAcross(op1), opf); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionFalseMask(Vector op1, Vector opf) { + //ARM64-FULL-LINE: mov v0.16b, v1.16b + Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt64(), Sve.AddAcross(op1), opf); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionZero(Vector op1, Vector opf) { + //ARM64-FULL-LINE: mov v0.16b, v1.16b + Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.AddAcross(op1), opf); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionTrueMask(Vector op1, Vector opf) { + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt64(), Sve.AddAcross(op1), opf); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionAllBits(Vector op1, Vector opf) { + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AddAcross(op1), opf); + Consume(result); + } + +} diff --git a/src/tests/JIT/opt/SVE/ConstantMasks.csproj b/src/tests/JIT/opt/SVE/ConstantMasks.csproj new file mode 100644 index 00000000000000..5482afbaa21aa8 --- /dev/null +++ b/src/tests/JIT/opt/SVE/ConstantMasks.csproj @@ -0,0 +1,19 @@ + + + + true + + + None + True + $(NoWarn),SYSLIB5003 + + + + true + + + + + + diff --git a/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.cs b/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.cs new file mode 100644 index 00000000000000..ba23ebe08f07c9 --- /dev/null +++ b/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.cs @@ -0,0 +1,309 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +// Unit tests for the masks conversion optimization +// Uses vectors as masks and vice versa. + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Threading; +using Xunit; + +public class ConstantMasks +{ + [MethodImpl(MethodImplOptions.NoInlining)] + private static void Consume(T value) { } + + [Fact] + public static void TestEntryPoint() + { + if (Sve.IsSupported) + { + Vector op1 = Vector.Create(11); + Vector op2 = Vector.Create(22); + Vector op3 = Vector.Create(33); + Vector opl1 = Vector.Create(44); + Vector opl2 = Vector.Create(55); + + CndSelectEmbeddedF(op1, op2, op3); + CndSelectEmbeddedZ(op1, op2, op3); + CndSelectEmbeddedFalseMaskF(op1, op2); + CndSelectEmbeddedFalseMaskZ(op1, op2); + CndSelectEmbeddedZeroF(op1, op2); + CndSelectEmbeddedZeroZ(op1, op2); + CndSelectEmbeddedTrueMaskF(op1, op2); + CndSelectEmbeddedTrueMaskZ(op1, op2); + CndSelectEmbeddedAllBitsF(op1, op2); + CndSelectEmbeddedAllBitsZ(op1, op2); + + CndSelectOptionalEmbeddedF(op1, op2, op3); + CndSelectOptionalEmbeddedZ(op1, op2, op3); + CndSelectOptionalEmbeddedFalseMaskF(op1, op2); + CndSelectOptionalEmbeddedFalseMaskZ(op1, op2); + CndSelectOptionalEmbeddedZeroF(op1, op2); + CndSelectOptionalEmbeddedZeroZ(op1, op2); + CndSelectOptionalEmbeddedTrueMaskF(op1, op2); + CndSelectOptionalEmbeddedTrueMaskZ(op1, op2); + CndSelectOptionalEmbeddedAllBitsF(op1, op2); + CndSelectOptionalEmbeddedAllBitsZ(op1, op2); + + CndSelectEmbeddedReductionF(opl1, op2); + CndSelectEmbeddedReductionZ(opl1, op2); + CndSelectEmbeddedReductionFalseMaskF(op1); + CndSelectEmbeddedReductionFalseMaskZ(op1); + CndSelectEmbeddedReductionZeroF(op1); + CndSelectEmbeddedReductionZeroZ(op1); + CndSelectEmbeddedReductionTrueMaskF(op1); + CndSelectEmbeddedReductionTrueMaskZ(op1); + CndSelectEmbeddedReductionAllBitsF(op1); + CndSelectEmbeddedReductionAllBitsZ(op1); + } + } + + // SVE operation (with embedded mask) inside a conditional select + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedF(Vector mask, Vector op1, Vector op2) { + //ARM6-FULL-LINE: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + var result = Sve.ConditionalSelect(mask, Sve.AbsoluteDifference(op1, op2), Sve.CreateFalseMaskInt32()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedZ(Vector mask, Vector op1, Vector op2) { + //ARM6-FULL-LINE: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + var result = Sve.ConditionalSelect(mask, Sve.AbsoluteDifference(op1, op2), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedFalseMaskF(Vector op1, Vector op2) { + //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 + var result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.AbsoluteDifference(op1, op2), Sve.CreateFalseMaskInt32()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedFalseMaskZ(Vector op1, Vector op2) { + //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 + var result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.AbsoluteDifference(op1, op2), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedZeroF(Vector op1, Vector op2) { + //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 + var result = Sve.ConditionalSelect(Vector.Zero, Sve.AbsoluteDifference(op1, op2), Sve.CreateFalseMaskInt32()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedZeroZ(Vector op1, Vector op2) { + //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 + var result = Sve.ConditionalSelect(Vector.Zero, Sve.AbsoluteDifference(op1, op2), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedTrueMaskF(Vector op1, Vector op2) { + //ARM6-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM6-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + var result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.AbsoluteDifference(op1, op2), Sve.CreateFalseMaskInt32()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedTrueMaskZ(Vector op1, Vector op2) { + //ARM6-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM6-FULL-LINE-NEXT: movprfx {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s + //ARM6-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + var result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.AbsoluteDifference(op1, op2), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedAllBitsF(Vector op1, Vector op2) { + //ARM6-FULL-LINE: mvni {{v[0-9]+}}.4s, #0 + //ARM6-FULL-LINE-NEXT: cmpne {{p[0-9]+}}.s, {{p[0-9]+}}/z, {{z[0-9]+}}.s, #0 + //ARM6-FULL-LINE-NEXT: movprfx {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s + //ARM6-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + var result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AbsoluteDifference(op1, op2), Sve.CreateFalseMaskInt32()); + Consume(result); + } + + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedAllBitsZ(Vector op1, Vector op2) { + //ARM6-FULL-LINE: mvni {{v[0-9]+}}.4s, #0 + //ARM6-FULL-LINE-NEXT: cmpne {{p[0-9]+}}.s, {{p[0-9]+}}/z, {{z[0-9]+}}.s, #0 + //ARM6-FULL-LINE-NEXT: sabd {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + var result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AbsoluteDifference(op1, op2), Vector.Zero); + Consume(result); + } + + // SVE operation (with optional embedded mask) inside a conditional select + + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedF(Vector mask, Vector op1, Vector op2) { + //ARM6-FULL-LINE: add {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + var result = Sve.ConditionalSelect(mask, Sve.Add(op1, op2), Sve.CreateFalseMaskInt32()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedZ(Vector mask, Vector op1, Vector op2) { + //ARM6-FULL-LINE: add {{z[0-9]+}}.s, {{p[0-9]+}}/m, {{z[0-9]+}}.s, {{z[0-9]+}}.s + var result = Sve.ConditionalSelect(mask, Sve.Add(op1, op2), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedFalseMaskF(Vector op1, Vector op2) { + //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 + var result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Add(op1, op2), Sve.CreateFalseMaskInt32()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedFalseMaskZ(Vector op1, Vector op2) { + //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 + var result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt32(), Sve.Add(op1, op2), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedZeroF(Vector op1, Vector op2) { + //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 + var result = Sve.ConditionalSelect(Vector.Zero, Sve.Add(op1, op2), Sve.CreateFalseMaskInt32()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedZeroZ(Vector op1, Vector op2) { + //ARM6-FULL-LINE: movi {{v[0-9]+}}.4s, #0 + var result = Sve.ConditionalSelect(Vector.Zero, Sve.Add(op1, op2), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedTrueMaskF(Vector op1, Vector op2) { + //ARM64-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s + Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.Add(op1, op2), Sve.CreateFalseMaskInt32()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedTrueMaskZ(Vector op1, Vector op2) { + //ARM64-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s + Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt32(), Sve.Add(op1, op2), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedAllBitsF(Vector op1, Vector op2) { + //ARM6-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s + var result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.Add(op1, op2), Sve.CreateFalseMaskInt32()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectOptionalEmbeddedAllBitsZ(Vector op1, Vector op2) { + //ARM6-FULL-LINE: add {{z[0-9]+}}.s, {{z[0-9]+}}.s, {{z[0-9]+}}.s + var result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.Add(op1, op2), Vector.Zero); + Consume(result); + } + + // SVE reduction operation (with embedded mask) inside a conditional select. + // The op and conditional select cannot be combined into one instruction. + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionF(Vector mask, Vector op1) { + //ARM64-FULL-LINE: cmpne {{p[0-9]+}}.d, {{p[0-9]+}}/z, {{z[0-9]+}}.d, #0 + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movi {{v[0-9]+}}.4s, #0 + //ARM64-FULL-LINE-NEXT: sel {{z[0-9]+}}.d, {{p[0-9]+}}, {{z[0-9]+}}.d, {{z[0-9]+}}.d + Vector result = Sve.ConditionalSelect(mask, Sve.AddAcross(op1), Sve.CreateFalseMaskInt64()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionZ(Vector mask, Vector op1) { + //ARM64-FULL-LINE: cmpne {{p[0-9]+}}.d, {{p[0-9]+}}/z, {{z[0-9]+}}.d, #0 + //ARM64-FULL-LINE-NEXT: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movi {{v[0-9]+}}.4s, #0 + //ARM64-FULL-LINE-NEXT: sel {{z[0-9]+}}.d, {{p[0-9]+}}, {{z[0-9]+}}.d, {{z[0-9]+}}.d + Vector result = Sve.ConditionalSelect(mask, Sve.AddAcross(op1), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionFalseMaskF(Vector op1) { + //ARM64-FULL-LINE: movi v0.4s, #0 + Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt64(), Sve.AddAcross(op1), Sve.CreateFalseMaskInt64()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionFalseMaskZ(Vector op1) { + //ARM64-FULL-LINE: movi v0.4s, #0 + Vector result = Sve.ConditionalSelect(Sve.CreateFalseMaskInt64(), Sve.AddAcross(op1), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionZeroF(Vector op1) { + //ARM64-FULL-LINE: movi v0.4s, #0 + Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.AddAcross(op1), Sve.CreateFalseMaskInt64()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionZeroZ(Vector op1) { + //ARM64-FULL-LINE: movi v0.4s, #0 + Vector result = Sve.ConditionalSelect(Vector.Zero, Sve.AddAcross(op1), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionTrueMaskF(Vector op1) { + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt64(), Sve.AddAcross(op1), Sve.CreateFalseMaskInt64()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionTrueMaskZ(Vector op1) { + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Sve.CreateTrueMaskInt64(), Sve.AddAcross(op1), Vector.Zero); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionAllBitsF(Vector op1) { + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AddAcross(op1), Sve.CreateFalseMaskInt64()); + Consume(result); + } + + [MethodImpl(MethodImplOptions.NoInlining)] + static void CndSelectEmbeddedReductionAllBitsZ(Vector op1) { + //ARM64-FULL-LINE: ptrue {{p[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: saddv {{d[0-9]+}}, {{p[0-9]+}}, {{z[0-9]+}}.s + //ARM64-FULL-LINE-NEXT: movz {{.*}} + Vector result = Sve.ConditionalSelect(Vector.AllBitsSet, Sve.AddAcross(op1), Vector.Zero); + Consume(result); + } +} diff --git a/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.csproj b/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.csproj new file mode 100644 index 00000000000000..5482afbaa21aa8 --- /dev/null +++ b/src/tests/JIT/opt/SVE/ConstantMasksOp2Fixed.csproj @@ -0,0 +1,19 @@ + + + + true + + + None + True + $(NoWarn),SYSLIB5003 + + + + true + + + + + + diff --git a/src/tests/JIT/opt/SVE/PredicateInstructions.cs b/src/tests/JIT/opt/SVE/PredicateInstructions.cs index 41b09c1fad3898..b1336674f1638b 100644 --- a/src/tests/JIT/opt/SVE/PredicateInstructions.cs +++ b/src/tests/JIT/opt/SVE/PredicateInstructions.cs @@ -35,56 +35,48 @@ public static void TestPredicateInstructions() [MethodImpl(MethodImplOptions.NoInlining)] static Vector ZipLow() { - //ARM64-FULL-LINE: zip1 {{p[0-9]+}}.h, {{p[0-9]+}}.h, {{p[0-9]+}}.h return Sve.ZipLow(Vector.Zero, Sve.CreateTrueMaskInt16()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector ZipHigh() { - //ARM64-FULL-LINE: zip2 {{p[0-9]+}}.s, {{p[0-9]+}}.s, {{p[0-9]+}}.s return Sve.ZipHigh(Sve.CreateTrueMaskUInt32(), Sve.CreateTrueMaskUInt32()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector UnzipEven() { - //ARM64-FULL-LINE: uzp1 {{p[0-9]+}}.b, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.UnzipEven(Sve.CreateTrueMaskSByte(), Vector.Zero); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector UnzipOdd() { - //ARM64-FULL-LINE: uzp2 {{p[0-9]+}}.h, {{p[0-9]+}}.h, {{p[0-9]+}}.h return Sve.UnzipOdd(Sve.CreateTrueMaskInt16(), Sve.CreateFalseMaskInt16()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector TransposeEven() { - //ARM64-FULL-LINE: trn1 {{p[0-9]+}}.d, {{p[0-9]+}}.d, {{p[0-9]+}}.d return Sve.TransposeEven(Sve.CreateFalseMaskInt64(), Sve.CreateTrueMaskInt64()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector TransposeOdd() { - //ARM64-FULL-LINE: trn2 {{p[0-9]+}}.h, {{p[0-9]+}}.h, {{p[0-9]+}}.h return Sve.TransposeOdd(Vector.Zero, Sve.CreateTrueMaskInt16()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector ReverseElement() { - //ARM64-FULL-LINE: rev {{p[0-9]+}}.h, {{p[0-9]+}}.h return Sve.ReverseElement(Sve.CreateTrueMaskInt16()); } [MethodImpl(MethodImplOptions.NoInlining)] static Vector And() { - //ARM64-FULL-LINE: and {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.ConditionalSelect( Sve.CreateTrueMaskInt16(), Sve.And(Sve.CreateTrueMaskInt16(), Sve.CreateTrueMaskInt16()), @@ -95,7 +87,6 @@ static Vector And() [MethodImpl(MethodImplOptions.NoInlining)] static Vector BitwiseClear() { - //ARM64-FULL-LINE: bic {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.ConditionalSelect( Sve.CreateFalseMaskInt16(), Sve.BitwiseClear(Sve.CreateTrueMaskInt16(), Sve.CreateTrueMaskInt16()), @@ -106,7 +97,6 @@ static Vector BitwiseClear() [MethodImpl(MethodImplOptions.NoInlining)] static Vector Xor() { - //ARM64-FULL-LINE: eor {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.ConditionalSelect( Sve.CreateTrueMaskInt32(), Sve.Xor(Sve.CreateTrueMaskInt32(), Sve.CreateTrueMaskInt32()), @@ -117,7 +107,6 @@ static Vector Xor() [MethodImpl(MethodImplOptions.NoInlining)] static Vector Or() { - //ARM64-FULL-LINE: orr {{p[0-9]+}}.b, {{p[0-9]+}}/z, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.ConditionalSelect( Sve.CreateTrueMaskInt16(), Sve.Or(Sve.CreateTrueMaskInt16(), Sve.CreateTrueMaskInt16()), @@ -128,7 +117,6 @@ static Vector Or() [MethodImpl(MethodImplOptions.NoInlining)] static Vector ConditionalSelect() { - //ARM64-FULL-LINE: sel {{p[0-9]+}}.b, {{p[0-9]+}}, {{p[0-9]+}}.b, {{p[0-9]+}}.b return Sve.ConditionalSelect( Vector.Zero, Sve.CreateFalseMaskInt32(),