-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[RISCV] Add BREV8 and ORC_B to hasAllNBitUsers in RISCVOptWInstrs. #148076
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-backend-risc-v Author: Craig Topper (topperc) ChangesThese were remove in #147830 due to ignoring that these instructions operate on bytes. This patch adds them back with tests including a test for the byte boundary issue. I seperated out the commits to show bad optimization if we don't round Bits to the nearest byte. Full diff: https://github.com/llvm/llvm-project/pull/148076.diff 2 Files Affected:
diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
index 24c05a2f807d0..d257f56cf4129 100644
--- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
+++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp
@@ -323,6 +323,12 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI,
Worklist.push_back(std::make_pair(UserMI, Bits));
break;
+ case RISCV::BREV8:
+ case RISCV::ORC_B:
+ // BREV8 and ORC_B work on bytes. Round Bits down to the nearest byte.
+ Worklist.push_back(std::make_pair(UserMI, alignDown(Bits, 8)));
+ break;
+
case RISCV::PseudoCCMOVGPR:
case RISCV::PseudoCCMOVGPRNoX0:
// Either operand 4 or operand 5 is returned by this instruction. If
diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll
index 1a978d1a0fcac..9c8230572b926 100644
--- a/llvm/test/CodeGen/RISCV/sextw-removal.ll
+++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll
@@ -1,9 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zknh,+v -target-abi=lp64f \
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64I
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \
; RUN: | FileCheck %s --check-prefixes=CHECK,RV64ZBB
-; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \
+; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \
; RUN: -riscv-disable-sextw-removal | FileCheck %s --check-prefix=NOREMOVAL
define void @test1(i32 signext %arg, i32 signext %arg1) nounwind {
@@ -1499,3 +1499,186 @@ bb7: ; preds = %bb2
}
declare i32 @llvm.riscv.vmv.x.s.nxv1i32( <vscale x 1 x i32>)
+
+; Test that we can look through brev8 in hasAllNBitUsers.
+define signext i32 @test21(i64 %arg1, i64 %arg2, i64 %arg3) {
+; RV64I-LABEL: test21:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: lui a3, 61681
+; RV64I-NEXT: lui a4, 209715
+; RV64I-NEXT: addi a3, a3, -241
+; RV64I-NEXT: addi a4, a4, 819
+; RV64I-NEXT: slli a5, a3, 32
+; RV64I-NEXT: add a3, a3, a5
+; RV64I-NEXT: slli a5, a4, 32
+; RV64I-NEXT: add a4, a4, a5
+; RV64I-NEXT: li a5, 256
+; RV64I-NEXT: .LBB25_1: # %bb2
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: srli a6, a0, 4
+; RV64I-NEXT: and a0, a0, a3
+; RV64I-NEXT: and a6, a6, a3
+; RV64I-NEXT: slli a0, a0, 4
+; RV64I-NEXT: or a0, a6, a0
+; RV64I-NEXT: srli a6, a0, 2
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: and a6, a6, a4
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: or a0, a6, a0
+; RV64I-NEXT: andi a6, a0, 65
+; RV64I-NEXT: srli a0, a0, 1
+; RV64I-NEXT: slli a6, a6, 1
+; RV64I-NEXT: andi a0, a0, 1104
+; RV64I-NEXT: or a0, a0, a6
+; RV64I-NEXT: addi a2, a2, 1
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: bltu a2, a5, .LBB25_1
+; RV64I-NEXT: # %bb.2: # %bb7
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: test21:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: addi a2, a2, -1
+; RV64ZBB-NEXT: li a3, 256
+; RV64ZBB-NEXT: .LBB25_1: # %bb2
+; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64ZBB-NEXT: brev8 a0, a0
+; RV64ZBB-NEXT: andi a0, a0, 1234
+; RV64ZBB-NEXT: addi a2, a2, 1
+; RV64ZBB-NEXT: addw a0, a0, a1
+; RV64ZBB-NEXT: bltu a2, a3, .LBB25_1
+; RV64ZBB-NEXT: # %bb.2: # %bb7
+; RV64ZBB-NEXT: ret
+;
+; NOREMOVAL-LABEL: test21:
+; NOREMOVAL: # %bb.0: # %entry
+; NOREMOVAL-NEXT: addi a2, a2, -1
+; NOREMOVAL-NEXT: li a3, 256
+; NOREMOVAL-NEXT: .LBB25_1: # %bb2
+; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
+; NOREMOVAL-NEXT: brev8 a0, a0
+; NOREMOVAL-NEXT: andi a0, a0, 1234
+; NOREMOVAL-NEXT: addi a2, a2, 1
+; NOREMOVAL-NEXT: add a0, a0, a1
+; NOREMOVAL-NEXT: bltu a2, a3, .LBB25_1
+; NOREMOVAL-NEXT: # %bb.2: # %bb7
+; NOREMOVAL-NEXT: sext.w a0, a0
+; NOREMOVAL-NEXT: ret
+entry:
+ br label %bb2
+
+bb2: ; preds = %bb2, %entry
+ %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ]
+ %i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ]
+ %i3 = add i64 %i2, 1
+ %bswap = call i64 @llvm.bswap.i64(i64 %i1)
+ %bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap)
+ %i4 = and i64 %bitreverse, 1234
+ %i5 = add i64 %i4, %arg2
+ %i6 = icmp ugt i64 %i2, 255
+ br i1 %i6, label %bb7, label %bb2
+
+bb7: ; preds = %bb2
+ %i7 = trunc i64 %i5 to i32
+ ret i32 %i7
+}
+
+; Negative test for looking through brev8. Make sure we consider that it works
+; on bytes.
+define signext i32 @test22(i64 %arg1, i64 %arg2, i64 %arg3) {
+; RV64I-LABEL: test22:
+; RV64I: # %bb.0: # %entry
+; RV64I-NEXT: addi a2, a2, -1
+; RV64I-NEXT: lui a3, %hi(.LCPI26_0)
+; RV64I-NEXT: lui a4, %hi(.LCPI26_1)
+; RV64I-NEXT: lui a5, %hi(.LCPI26_2)
+; RV64I-NEXT: lui a6, %hi(.LCPI26_3)
+; RV64I-NEXT: li a7, 69
+; RV64I-NEXT: ld a3, %lo(.LCPI26_0)(a3)
+; RV64I-NEXT: ld a4, %lo(.LCPI26_1)(a4)
+; RV64I-NEXT: ld a5, %lo(.LCPI26_2)(a5)
+; RV64I-NEXT: ld a6, %lo(.LCPI26_3)(a6)
+; RV64I-NEXT: slli a7, a7, 32
+; RV64I-NEXT: li t0, 65
+; RV64I-NEXT: slli t0, t0, 28
+; RV64I-NEXT: li t1, 256
+; RV64I-NEXT: .LBB26_1: # %bb2
+; RV64I-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64I-NEXT: slli t2, a0, 11
+; RV64I-NEXT: slli a0, a0, 3
+; RV64I-NEXT: and t2, t2, a3
+; RV64I-NEXT: and a0, a0, a4
+; RV64I-NEXT: or a0, a0, t2
+; RV64I-NEXT: srli t2, a0, 2
+; RV64I-NEXT: and a0, a0, a6
+; RV64I-NEXT: and t2, t2, a5
+; RV64I-NEXT: slli a0, a0, 2
+; RV64I-NEXT: or a0, t2, a0
+; RV64I-NEXT: srli t2, a0, 1
+; RV64I-NEXT: and a0, a0, t0
+; RV64I-NEXT: and t2, t2, a7
+; RV64I-NEXT: slli a0, a0, 1
+; RV64I-NEXT: or a0, t2, a0
+; RV64I-NEXT: srli a0, a0, 28
+; RV64I-NEXT: addi a2, a2, 1
+; RV64I-NEXT: add a0, a0, a1
+; RV64I-NEXT: bltu a2, t1, .LBB26_1
+; RV64I-NEXT: # %bb.2: # %bb7
+; RV64I-NEXT: sext.w a0, a0
+; RV64I-NEXT: ret
+;
+; RV64ZBB-LABEL: test22:
+; RV64ZBB: # %bb.0: # %entry
+; RV64ZBB-NEXT: addi a2, a2, -1
+; RV64ZBB-NEXT: li a3, 256
+; RV64ZBB-NEXT: .LBB26_1: # %bb2
+; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1
+; RV64ZBB-NEXT: slli a0, a0, 7
+; RV64ZBB-NEXT: brev8 a0, a0
+; RV64ZBB-NEXT: srli a0, a0, 28
+; RV64ZBB-NEXT: andi a0, a0, 1234
+; RV64ZBB-NEXT: addi a2, a2, 1
+; RV64ZBB-NEXT: add a0, a0, a1
+; RV64ZBB-NEXT: bltu a2, a3, .LBB26_1
+; RV64ZBB-NEXT: # %bb.2: # %bb7
+; RV64ZBB-NEXT: sext.w a0, a0
+; RV64ZBB-NEXT: ret
+;
+; NOREMOVAL-LABEL: test22:
+; NOREMOVAL: # %bb.0: # %entry
+; NOREMOVAL-NEXT: addi a2, a2, -1
+; NOREMOVAL-NEXT: li a3, 256
+; NOREMOVAL-NEXT: .LBB26_1: # %bb2
+; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1
+; NOREMOVAL-NEXT: slli a0, a0, 7
+; NOREMOVAL-NEXT: brev8 a0, a0
+; NOREMOVAL-NEXT: srli a0, a0, 28
+; NOREMOVAL-NEXT: andi a0, a0, 1234
+; NOREMOVAL-NEXT: addi a2, a2, 1
+; NOREMOVAL-NEXT: add a0, a0, a1
+; NOREMOVAL-NEXT: bltu a2, a3, .LBB26_1
+; NOREMOVAL-NEXT: # %bb.2: # %bb7
+; NOREMOVAL-NEXT: sext.w a0, a0
+; NOREMOVAL-NEXT: ret
+entry:
+ br label %bb2
+
+bb2: ; preds = %bb2, %entry
+ %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ]
+ %i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ]
+ %i3 = add i64 %i2, 1
+ %shl = shl i64 %i1, 7
+ %bswap = call i64 @llvm.bswap.i64(i64 %shl)
+ %bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap)
+ %lshr = lshr i64 %bitreverse, 28
+ %i4 = and i64 %lshr, 1234
+ %i5 = add i64 %i4, %arg2
+ %i6 = icmp ugt i64 %i2, 255
+ br i1 %i6, label %bb7, label %bb2
+
+bb7: ; preds = %bb2
+ %i7 = trunc i64 %i5 to i32
+ ret i32 %i7
+}
|
preames
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/18/builds/18854 Here is the relevant piece of the build log for the reference |
These were remove in #147830 due to ignoring that these instructions operate on bytes. This patch adds them back with tests including a test for the byte boundary issue.
I seperated out the commits to show bad optimization if we don't round Bits to the nearest byte.