@@ -20861,12 +20861,6 @@ GenTree* Compiler::gtNewSimdBinOpNode(
2086120861 }
2086220862 }
2086320863 }
20864-
20865- if (op == GT_AND_NOT)
20866- {
20867- // GT_AND_NOT expects `op1 & ~op2`, but xarch does `~op1 & op2`
20868- needsReverseOps = true;
20869- }
2087020864 break;
2087120865 }
2087220866#endif // TARGET_XARCH
@@ -20897,11 +20891,34 @@ GenTree* Compiler::gtNewSimdBinOpNode(
2089720891
2089820892 if (intrinsic != NI_Illegal)
2089920893 {
20894+ if (op == GT_AND_NOT)
20895+ {
20896+ assert(fgNodeThreading == NodeThreading::LIR);
20897+
20898+ #if defined(TARGET_XARCH)
20899+ // GT_AND_NOT expects `op1 & ~op2`, but xarch does `~op1 & op2`
20900+ // We specially handle this here since we're only producing a
20901+ // native intrinsic node in LIR
20902+
20903+ std::swap(op1, op2);
20904+ #endif // TARGET_XARCH
20905+ }
2090020906 return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
2090120907 }
2090220908
2090320909 switch (op)
2090420910 {
20911+ case GT_AND_NOT:
20912+ {
20913+ // Prior to LIR, we want to explicitly decompose this operation so that downstream phases can
20914+ // appropriately optimize around the individual operations being performed, particularly ~op2,
20915+ // and produce overall better codegen.
20916+ assert(fgNodeThreading != NodeThreading::LIR);
20917+
20918+ op2 = gtNewSimdUnOpNode(GT_NOT, type, op2, simdBaseJitType, simdSize);
20919+ return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseJitType, simdSize);
20920+ }
20921+
2090520922#if defined(TARGET_XARCH)
2090620923 case GT_RSZ:
2090720924 {
@@ -21066,9 +21083,6 @@ GenTree* Compiler::gtNewSimdBinOpNode(
2106621083 vecCon1->gtSimdVal.u64[i] = 0x00FF00FF00FF00FF;
2106721084 }
2106821085
21069- // Validate we can't use AVX512F_VL_TernaryLogic here
21070- assert(!canUseEvexEncodingDebugOnly());
21071-
2107221086 // Vector256<short> maskedProduct = Avx2.And(widenedProduct, vecCon1).AsInt16()
2107321087 GenTree* maskedProduct = gtNewSimdBinOpNode(GT_AND, widenedType, widenedProduct, vecCon1,
2107421088 widenedSimdBaseJitType, widenedSimdSize);
@@ -22033,9 +22047,6 @@ GenTree* Compiler::gtNewSimdCmpOpNode(
2203322047 v = gtNewSimdHWIntrinsicNode(type, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle,
2203422048 CORINFO_TYPE_INT, simdSize);
2203522049
22036- // Validate we can't use AVX512F_VL_TernaryLogic here
22037- assert(!canUseEvexEncodingDebugOnly());
22038-
2203922050 op2 = gtNewSimdBinOpNode(GT_AND, type, u, v, simdBaseJitType, simdSize);
2204022051 return gtNewSimdBinOpNode(GT_OR, type, op1, op2, simdBaseJitType, simdSize);
2204122052 }
@@ -24146,9 +24157,6 @@ GenTree* Compiler::gtNewSimdNarrowNode(
2414624157
2414724158 GenTree* vecCon2 = gtCloneExpr(vecCon1);
2414824159
24149- // Validate we can't use AVX512F_VL_TernaryLogic here
24150- assert(!canUseEvexEncodingDebugOnly());
24151-
2415224160 tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize);
2415324161 tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize);
2415424162 tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, CORINFO_TYPE_UBYTE,
@@ -24187,9 +24195,6 @@ GenTree* Compiler::gtNewSimdNarrowNode(
2418724195
2418824196 GenTree* vecCon2 = gtCloneExpr(vecCon1);
2418924197
24190- // Validate we can't use AVX512F_VL_TernaryLogic here
24191- assert(!canUseEvexEncodingDebugOnly());
24192-
2419324198 tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize);
2419424199 tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize);
2419524200 tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, CORINFO_TYPE_USHORT,
@@ -24291,9 +24296,6 @@ GenTree* Compiler::gtNewSimdNarrowNode(
2429124296
2429224297 GenTree* vecCon2 = gtCloneExpr(vecCon1);
2429324298
24294- // Validate we can't use AVX512F_VL_TernaryLogic here
24295- assert(!canUseEvexEncodingDebugOnly());
24296-
2429724299 tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize);
2429824300 tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize);
2429924301
@@ -24330,9 +24332,6 @@ GenTree* Compiler::gtNewSimdNarrowNode(
2433024332
2433124333 GenTree* vecCon2 = gtCloneExpr(vecCon1);
2433224334
24333- // Validate we can't use AVX512F_VL_TernaryLogic here
24334- assert(!canUseEvexEncodingDebugOnly());
24335-
2433624335 tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize);
2433724336 tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize);
2433824337
@@ -27821,6 +27820,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp,
2782127820 assert(!isScalar);
2782227821 assert(op2->TypeIs(simdType));
2782327822
27823+ if (comp->fgNodeThreading != NodeThreading::LIR)
27824+ {
27825+ // We don't want to support creating AND_NOT nodes prior to LIR
27826+ // as it can break important optimizations. We'll produces this
27827+ // in lowering instead.
27828+ break;
27829+ }
27830+
2782427831#if defined(TARGET_XARCH)
2782527832 if (simdSize == 64)
2782627833 {
@@ -30155,13 +30162,8 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3015530162 bool isScalar = false;
3015630163 genTreeOps oper = tree->GetOperForHWIntrinsicId(&isScalar);
3015730164
30158- #if defined(TARGET_XARCH)
30159- if (oper == GT_AND_NOT)
30160- {
30161- // xarch does: ~op1 & op2, we need op1 & ~op2
30162- std::swap(op1, op2);
30163- }
30164- #endif // TARGET_XARCH
30165+ // We shouldn't find AND_NOT nodes since it should only be produced in lowering
30166+ assert(oper != GT_AND_NOT);
3016530167
3016630168 GenTree* cnsNode = nullptr;
3016730169 GenTree* otherNode = nullptr;
@@ -30674,31 +30676,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3067430676 break;
3067530677 }
3067630678
30677- case GT_AND_NOT:
30678- {
30679- // Handle `x & ~0 == x` and `0 & ~x == 0`
30680- if (cnsNode->IsVectorZero())
30681- {
30682- if (cnsNode == op1)
30683- {
30684- resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT);
30685- break;
30686- }
30687- else
30688- {
30689- resultNode = otherNode;
30690- }
30691- break;
30692- }
30693-
30694- // Handle `x & ~AllBitsSet == 0`
30695- if (cnsNode->IsVectorAllBitsSet() && (cnsNode == op2))
30696- {
30697- resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT);
30698- }
30699- break;
30700- }
30701-
3070230679 case GT_DIV:
3070330680 {
3070430681 if (varTypeIsFloating(simdBaseType))
@@ -31089,12 +31066,12 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
3108931066 {
3109031067 switch (ni)
3109131068 {
31092- case NI_Vector128_ConditionalSelect:
3109331069#if defined(TARGET_XARCH)
31070+ case NI_Vector128_ConditionalSelect:
3109431071 case NI_Vector256_ConditionalSelect:
3109531072 case NI_Vector512_ConditionalSelect:
3109631073#elif defined(TARGET_ARM64)
31097- case NI_Vector64_ConditionalSelect :
31074+ case NI_AdvSimd_BitwiseSelect :
3109831075 case NI_Sve_ConditionalSelect:
3109931076#endif
3110031077 {
0 commit comments