-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[DAG] Add ISD::FP_TO_SINT_SAT/FP_TO_UINT_SAT handling to SelectionDAG::canCreateUndefOrPoison #154244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
…FP_TO_UINT_SAT handling + tests
|
@llvm/pr-subscribers-backend-x86 Author: Ye Tian (TianYe717) ChangesRelated to issue Full diff: https://github.com/llvm/llvm-project/pull/154244.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 84282d8a1c37b..f2cb0c600910e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5748,6 +5748,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::FMA:
case ISD::FMAD:
case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
// No poison except from flags (which is handled above)
return false;
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
index 7f6d64c21724a..a251e43e4295b 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
@@ -57,6 +57,52 @@ define <4 x i1> @test_signed_v4i1_v4f32(<4 x float> %f) nounwind {
ret <4 x i1> %x
}
+define <4 x i1> @test_freeze_signed_v4i1_v4f32(<4 x float> %f) nounwind {
+; CHECK-LABEL: test_freeze_signed_v4i1_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
+; CHECK-NEXT: movss {{.*#+}} xmm2 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: ucomiss %xmm1, %xmm1
+; CHECK-NEXT: maxss %xmm2, %xmm1
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: minss %xmm3, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %ecx
+; CHECK-NEXT: cmovpl %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm4
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
+; CHECK-NEXT: ucomiss %xmm4, %xmm4
+; CHECK-NEXT: maxss %xmm2, %xmm4
+; CHECK-NEXT: minss %xmm3, %xmm4
+; CHECK-NEXT: cvttss2si %xmm4, %ecx
+; CHECK-NEXT: cmovpl %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm4
+; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: maxss %xmm2, %xmm1
+; CHECK-NEXT: minss %xmm3, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %ecx
+; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: cmovpl %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: maxss %xmm2, %xmm0
+; CHECK-NEXT: minss %xmm3, %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %ecx
+; CHECK-NEXT: cmovpl %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm0
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %x = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> %f)
+ %y = freeze <4 x i1> %x
+ ret <4 x i1> %y
+}
+
define <4 x i8> @test_signed_v4i8_v4f32(<4 x float> %f) nounwind {
; CHECK-LABEL: test_signed_v4i8_v4f32:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
index ffbdd66529f5c..7b1db5c29abaf 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
@@ -48,6 +48,43 @@ define <4 x i1> @test_unsigned_v4i1_v4f32(<4 x float> %f) nounwind {
ret <4 x i1> %x
}
+define <4 x i1> @test_freeze_unsigned_v4i1_v4f32(<4 x float> %f) nounwind {
+; CHECK-LABEL: test_freeze_unsigned_v4i1_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
+; CHECK-NEXT: xorps %xmm2, %xmm2
+; CHECK-NEXT: maxss %xmm2, %xmm1
+; CHECK-NEXT: movss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: minss %xmm3, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
+; CHECK-NEXT: movd %eax, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm4
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
+; CHECK-NEXT: maxss %xmm2, %xmm4
+; CHECK-NEXT: minss %xmm3, %xmm4
+; CHECK-NEXT: cvttss2si %xmm4, %eax
+; CHECK-NEXT: movd %eax, %xmm4
+; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: maxss %xmm2, %xmm1
+; CHECK-NEXT: minss %xmm3, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
+; CHECK-NEXT: movd %eax, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: maxss %xmm2, %xmm0
+; CHECK-NEXT: minss %xmm3, %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %x = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> %f)
+ %y = freeze <4 x i1> %x
+ ret <4 x i1> %y
+}
+
define <4 x i8> @test_unsigned_v4i8_v4f32(<4 x float> %f) nounwind {
; CHECK-LABEL: test_unsigned_v4i8_v4f32:
; CHECK: # %bb.0:
|
|
@llvm/pr-subscribers-llvm-selectiondag Author: Ye Tian (TianYe717) ChangesRelated to issue Full diff: https://github.com/llvm/llvm-project/pull/154244.diff 3 Files Affected:
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 84282d8a1c37b..f2cb0c600910e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -5748,6 +5748,8 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts,
case ISD::FMA:
case ISD::FMAD:
case ISD::FP_EXTEND:
+ case ISD::FP_TO_SINT_SAT:
+ case ISD::FP_TO_UINT_SAT:
// No poison except from flags (which is handled above)
return false;
diff --git a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
index 7f6d64c21724a..a251e43e4295b 100644
--- a/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptosi-sat-vector-128.ll
@@ -57,6 +57,52 @@ define <4 x i1> @test_signed_v4i1_v4f32(<4 x float> %f) nounwind {
ret <4 x i1> %x
}
+define <4 x i1> @test_freeze_signed_v4i1_v4f32(<4 x float> %f) nounwind {
+; CHECK-LABEL: test_freeze_signed_v4i1_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
+; CHECK-NEXT: movss {{.*#+}} xmm2 = [-1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: xorl %eax, %eax
+; CHECK-NEXT: ucomiss %xmm1, %xmm1
+; CHECK-NEXT: maxss %xmm2, %xmm1
+; CHECK-NEXT: xorps %xmm3, %xmm3
+; CHECK-NEXT: minss %xmm3, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %ecx
+; CHECK-NEXT: cmovpl %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm4
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
+; CHECK-NEXT: ucomiss %xmm4, %xmm4
+; CHECK-NEXT: maxss %xmm2, %xmm4
+; CHECK-NEXT: minss %xmm3, %xmm4
+; CHECK-NEXT: cvttss2si %xmm4, %ecx
+; CHECK-NEXT: cmovpl %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm4
+; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: maxss %xmm2, %xmm1
+; CHECK-NEXT: minss %xmm3, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %ecx
+; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: cmovpl %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: ucomiss %xmm0, %xmm0
+; CHECK-NEXT: maxss %xmm2, %xmm0
+; CHECK-NEXT: minss %xmm3, %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %ecx
+; CHECK-NEXT: cmovpl %eax, %ecx
+; CHECK-NEXT: movd %ecx, %xmm0
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %x = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f32(<4 x float> %f)
+ %y = freeze <4 x i1> %x
+ ret <4 x i1> %y
+}
+
define <4 x i8> @test_signed_v4i8_v4f32(<4 x float> %f) nounwind {
; CHECK-LABEL: test_signed_v4i8_v4f32:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
index ffbdd66529f5c..7b1db5c29abaf 100644
--- a/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
+++ b/llvm/test/CodeGen/X86/fptoui-sat-vector-128.ll
@@ -48,6 +48,43 @@ define <4 x i1> @test_unsigned_v4i1_v4f32(<4 x float> %f) nounwind {
ret <4 x i1> %x
}
+define <4 x i1> @test_freeze_unsigned_v4i1_v4f32(<4 x float> %f) nounwind {
+; CHECK-LABEL: test_freeze_unsigned_v4i1_v4f32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,3],xmm0[3,3]
+; CHECK-NEXT: xorps %xmm2, %xmm2
+; CHECK-NEXT: maxss %xmm2, %xmm1
+; CHECK-NEXT: movss {{.*#+}} xmm3 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
+; CHECK-NEXT: minss %xmm3, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
+; CHECK-NEXT: movd %eax, %xmm1
+; CHECK-NEXT: movaps %xmm0, %xmm4
+; CHECK-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
+; CHECK-NEXT: maxss %xmm2, %xmm4
+; CHECK-NEXT: minss %xmm3, %xmm4
+; CHECK-NEXT: cvttss2si %xmm4, %eax
+; CHECK-NEXT: movd %eax, %xmm4
+; CHECK-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1]
+; CHECK-NEXT: movaps %xmm0, %xmm1
+; CHECK-NEXT: maxss %xmm2, %xmm1
+; CHECK-NEXT: minss %xmm3, %xmm1
+; CHECK-NEXT: cvttss2si %xmm1, %eax
+; CHECK-NEXT: movd %eax, %xmm1
+; CHECK-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
+; CHECK-NEXT: maxss %xmm2, %xmm0
+; CHECK-NEXT: minss %xmm3, %xmm0
+; CHECK-NEXT: cvttss2si %xmm0, %eax
+; CHECK-NEXT: movd %eax, %xmm0
+; CHECK-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm4[0]
+; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: retq
+ %x = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f32(<4 x float> %f)
+ %y = freeze <4 x i1> %x
+ ret <4 x i1> %y
+}
+
define <4 x i8> @test_unsigned_v4i8_v4f32(<4 x float> %f) nounwind {
; CHECK-LABEL: test_unsigned_v4i8_v4f32:
; CHECK: # %bb.0:
|
|
@arsenm Thanks for the approval! I don’t have commit access — could you please help merge this PR? |
This PR resolves #153366