-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[AArch64] Simplify some masked integer comparisons. #153783
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Ricardo Jesus (rj-jesus) ChangesSpecifically, This improves the comparison in isinf, for example: int isinf(float x) {
return __builtin_isinf(x);
} Before:
After:
I've added this in Full diff: https://github.com/llvm/llvm-project/pull/153783.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 224bbe7e38a19..7e51df5b93266 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -25037,6 +25037,32 @@ SDValue performCONDCombine(SDNode *N,
CmpIndex, CC))
return Val;
+ // X & M ?= C --> (C << clz(M)) ?= (X << clz(M)) where M is a non-empty
+ // sequence of ones starting at the least significant bit with the remainder
+ // zero and C is a constant s.t. (C & ~M) == 0 that cannot be materialised
+ // into a SUBS (immediate). The transformed form can be matched into a SUBS
+ // (shifted register).
+ if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && AndNode->hasOneUse() &&
+ isa<ConstantSDNode>(AndNode->getOperand(1)) &&
+ isa<ConstantSDNode>(SubsNode->getOperand(1))) {
+ SDValue X = AndNode->getOperand(0);
+ APInt M = AndNode->getConstantOperandAPInt(1);
+ APInt C = SubsNode->getConstantOperandAPInt(1);
+
+ if (M.isMask() && !(C & ~M) && (C & 0xfff) != C && (C & 0xfff000) != C) {
+ SDLoc DL(SubsNode);
+ EVT VT = SubsNode->getValueType(0);
+ unsigned ShiftAmt = M.countl_zero();
+ SDValue ShiftedX = DAG.getNode(
+ ISD::SHL, DL, VT, X, DAG.getShiftAmountConstant(ShiftAmt, VT, DL));
+ SDValue ShiftedC = DAG.getConstant(C << ShiftAmt, DL, VT);
+ SDValue NewSubs = DAG.getNode(AArch64ISD::SUBS, DL, SubsNode->getVTList(),
+ ShiftedC, ShiftedX);
+ DCI.CombineTo(SubsNode, NewSubs, NewSubs.getValue(1));
+ return SDValue(N, 0);
+ }
+ }
+
if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndNode->getOperand(1))) {
uint32_t CNV = CN->getZExtValue();
if (CNV == 255)
diff --git a/llvm/test/CodeGen/AArch64/isinf.ll b/llvm/test/CodeGen/AArch64/isinf.ll
index e68539bcf07d9..e8bbaf96395f0 100644
--- a/llvm/test/CodeGen/AArch64/isinf.ll
+++ b/llvm/test/CodeGen/AArch64/isinf.ll
@@ -27,9 +27,8 @@ define i32 @replace_isinf_call_f32(float %x) {
; CHECK-LABEL: replace_isinf_call_f32:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov w9, s0
-; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
-; CHECK-NEXT: and w9, w9, #0x7fffffff
-; CHECK-NEXT: cmp w9, w8
+; CHECK-NEXT: mov w8, #-16777216 // =0xff000000
+; CHECK-NEXT: cmp w8, w9, lsl #1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%abs = tail call float @llvm.fabs.f32(float %x)
@@ -43,9 +42,8 @@ define i32 @replace_isinf_call_f64(double %x) {
; CHECK-LABEL: replace_isinf_call_f64:
; CHECK: // %bb.0:
; CHECK-NEXT: fmov x9, d0
-; CHECK-NEXT: mov x8, #9218868437227405312 // =0x7ff0000000000000
-; CHECK-NEXT: and x9, x9, #0x7fffffffffffffff
-; CHECK-NEXT: cmp x9, x8
+; CHECK-NEXT: mov x8, #-9007199254740992 // =0xffe0000000000000
+; CHECK-NEXT: cmp x8, x9, lsl #1
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: ret
%abs = tail call double @llvm.fabs.f64(double %x)
diff --git a/llvm/test/CodeGen/AArch64/masked-integer-compare.ll b/llvm/test/CodeGen/AArch64/masked-integer-compare.ll
new file mode 100644
index 0000000000000..363cd10c78a94
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/masked-integer-compare.ll
@@ -0,0 +1,178 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -o -| FileCheck %s
+
+; Test code generation support for SUBS (shifted register) from masked integer
+; compare sequences. These sequences appear in isinf tests, for example.
+
+define i1 @combine_masked_i32(i32 %x) {
+; CHECK-LABEL: combine_masked_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-16777216 // =0xff000000
+; CHECK-NEXT: cmp w8, w0, lsl #1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x7fffffff
+ %sub = sub i32 %and, u0x7f800000
+ %cmp = icmp eq i32 %sub, 0
+ ret i1 %cmp
+}
+
+define i1 @combine_masked_i64(i64 %x) {
+; CHECK-LABEL: combine_masked_i64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x8, #-9007199254740992 // =0xffe0000000000000
+; CHECK-NEXT: cmp x8, x0, lsl #1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %and = and i64 %x, u0x7fffffffffffffff
+ %sub = sub i64 %and, u0x7ff0000000000000
+ %cmp = icmp eq i64 %sub, 0
+ ret i1 %cmp
+}
+
+define i1 @combine_masked_ne(i32 %x) {
+; CHECK-LABEL: combine_masked_ne:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-16777216 // =0xff000000
+; CHECK-NEXT: cmp w8, w0, lsl #1
+; CHECK-NEXT: cset w0, ne
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x7fffffff
+ %cmp = icmp ne i32 %and, u0x7f800000
+ ret i1 %cmp
+}
+
+define i1 @combine_masked_lsl4(i32 %x) {
+; CHECK-LABEL: combine_masked_lsl4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-134217728 // =0xf8000000
+; CHECK-NEXT: cmp w8, w0, lsl #4
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x0fffffff
+ %cmp = icmp eq i32 %and, u0x0f800000
+ ret i1 %cmp
+}
+
+define i1 @dont_combine_not_mask(i32 %x) {
+; CHECK-LABEL: dont_combine_not_mask:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-NEXT: and w9, w0, #0x7ffffffe
+; CHECK-NEXT: cmp w9, w8
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x7ffffffe
+ %cmp = icmp eq i32 %and, u0x7f800000
+ ret i1 %cmp
+}
+
+define i1 @dont_combine_cmp_not_masked(i32 %x) {
+; CHECK-LABEL: dont_combine_cmp_not_masked:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-NEXT: and w9, w0, #0x3fffffff
+; CHECK-NEXT: cmp w9, w8
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x3fffffff
+ %cmp = icmp eq i32 %and, u0x7f800000
+ ret i1 %cmp
+}
+
+define i1 @dont_combine_not_constant_mask(i32 %x, i32 %m) {
+; CHECK-LABEL: dont_combine_not_constant_mask:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-NEXT: and w9, w0, w1
+; CHECK-NEXT: cmp w9, w8
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %and = and i32 %x, %m
+ %cmp = icmp eq i32 %and, u0x7f800000
+ ret i1 %cmp
+}
+
+define i1 @dont_combine_not_constant_cmp(i32 %x, i32 %c) {
+; CHECK-LABEL: dont_combine_not_constant_cmp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0xfffffff
+; CHECK-NEXT: cmp w8, w1
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x0fffffff
+ %cmp = icmp eq i32 %and, %c
+ ret i1 %cmp
+}
+
+define i1 @dont_combine_subs_imm(i32 %x) {
+; CHECK-LABEL: dont_combine_subs_imm:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0x7fffffff
+; CHECK-NEXT: cmp w8, #291
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x7fffffff
+ %cmp = icmp eq i32 %and, u0x123
+ ret i1 %cmp
+}
+
+define i1 @dont_combine_subs_imm_lsl12(i32 %x) {
+; CHECK-LABEL: dont_combine_subs_imm_lsl12:
+; CHECK: // %bb.0:
+; CHECK-NEXT: and w8, w0, #0x7fffffff
+; CHECK-NEXT: cmp w8, #291, lsl #12 // =1191936
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x7fffffff
+ %cmp = icmp eq i32 %and, u0x123000
+ ret i1 %cmp
+}
+
+define { i1, i1 } @dont_combine_multi_use_cmp(i32 %x) {
+; CHECK-LABEL: dont_combine_multi_use_cmp:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-NEXT: and w9, w0, #0x7fffffff
+; CHECK-NEXT: cmp w9, w8
+; CHECK-NEXT: cset w0, eq
+; CHECK-NEXT: cset w1, lt
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x7fffffff
+ %eq = icmp eq i32 %and, u0x7f800000
+ %lt = icmp slt i32 %and, u0x7f800000
+ %r1 = insertvalue { i1, i1 } poison, i1 %eq, 0
+ %r2 = insertvalue { i1, i1 } %r1, i1 %lt, 1
+ ret { i1, i1 } %r2
+}
+
+define { i32, i1 } @dont_combine_multi_use_sub(i32 %x) {
+; CHECK-LABEL: dont_combine_multi_use_sub:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #-2139095040 // =0x80800000
+; CHECK-NEXT: and w9, w0, #0x7fffffff
+; CHECK-NEXT: adds w0, w9, w8
+; CHECK-NEXT: cset w1, eq
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x7fffffff
+ %sub = sub i32 %and, u0x7f800000
+ %cmp = icmp eq i32 %sub, 0
+ %r1 = insertvalue { i32, i1 } poison, i32 %sub, 0
+ %r2 = insertvalue { i32, i1 } %r1, i1 %cmp, 1
+ ret { i32, i1 } %r2
+}
+
+define { i32, i1 } @dont_combine_multi_use_and(i32 %x) {
+; CHECK-LABEL: dont_combine_multi_use_and:
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov w8, #2139095040 // =0x7f800000
+; CHECK-NEXT: and w0, w0, #0x7fffffff
+; CHECK-NEXT: cmp w0, w8
+; CHECK-NEXT: cset w1, eq
+; CHECK-NEXT: ret
+ %and = and i32 %x, u0x7fffffff
+ %cmp = icmp eq i32 %and, u0x7f800000
+ %r1 = insertvalue { i32, i1 } poison, i32 %and, 0
+ %r2 = insertvalue { i32, i1 } %r1, i1 %cmp, 1
+ ret { i32, i1 } %r2
+}
|
Ping :) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
A couple of suggestions but otherwise this looks good to me.
Specifically, X & M ?= C --> (C << clz(M)) ?= (X << clz(M)) where M is a non-empty sequence of ones starting at the least significant bit with the remainder zero and C is a constant s.t. (C & ~M) == 0 that cannot be materialised into a SUBS (immediate). Proof: https://alive2.llvm.org/ce/z/haqdJ4 This improves the comparison in isinf, for example: ```cpp int isinf(float x) { return __builtin_isinf(x); } ``` Before: ``` isinf: fmov w9, s0 mov w8, #2139095040 and w9, w9, #0x7fffffff cmp w9, w8 cset w0, eq ret ``` After: ``` isinf: fmov w9, s0 mov w8, #-16777216 cmp w8, w9, lsl llvm#1 cset w0, eq ret ``` I've added this in `performCONDCombine`, but please let me know if there's a better place for it.
5c1279a
to
affca37
Compare
Specifically, `X & M ?= C --> (C << clz(M)) ?= (X << clz(M))` where M is a non-empty sequence of ones starting at the least significant bit with the remainder zero and C is a constant subset of M that cannot be materialised into a SUBS (immediate). Proof: https://alive2.llvm.org/ce/z/haqdJ4. This improves the comparison in isinf, for example: ```cpp int isinf(float x) { return __builtin_isinf(x); } ``` Before: ``` isinf: fmov w9, s0 mov w8, #2139095040 and w9, w9, #0x7fffffff cmp w9, w8 cset w0, eq ret ``` After: ``` isinf: fmov w9, s0 mov w8, #-16777216 cmp w8, w9, lsl #1 cset w0, eq ret ```
Specifically,
X & M ?= C --> (C << clz(M)) ?= (X << clz(M))
where M is a non-empty sequence of ones starting at the least significant bit with the remainder zero and C is a constant(C & ~M) == 0
that cannot be materialised into a SUBS (immediate). Proof: https://alive2.llvm.org/ce/z/haqdJ4.This improves the comparison in isinf, for example:
Before:
After:
I've added this in
performCONDCombine
for the time being, but please let me know if there's a better place for it.