diff --git a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp index 24c05a2f807d0..d257f56cf4129 100644 --- a/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp +++ b/llvm/lib/Target/RISCV/RISCVOptWInstrs.cpp @@ -323,6 +323,12 @@ static bool hasAllNBitUsers(const MachineInstr &OrigMI, Worklist.push_back(std::make_pair(UserMI, Bits)); break; + case RISCV::BREV8: + case RISCV::ORC_B: + // BREV8 and ORC_B work on bytes. Round Bits down to the nearest byte. + Worklist.push_back(std::make_pair(UserMI, alignDown(Bits, 8))); + break; + case RISCV::PseudoCCMOVGPR: case RISCV::PseudoCCMOVGPRNoX0: // Either operand 4 or operand 5 is returned by this instruction. If diff --git a/llvm/test/CodeGen/RISCV/sextw-removal.ll b/llvm/test/CodeGen/RISCV/sextw-removal.ll index 1a978d1a0fcac..9c8230572b926 100644 --- a/llvm/test/CodeGen/RISCV/sextw-removal.ll +++ b/llvm/test/CodeGen/RISCV/sextw-removal.ll @@ -1,9 +1,9 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+f,+zknh,+v -target-abi=lp64f \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64I -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \ ; RUN: | FileCheck %s --check-prefixes=CHECK,RV64ZBB -; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+f,+zknh,+v -target-abi=lp64f \ +; RUN: llc < %s -mtriple=riscv64 -mattr=+m,+zba,+zbb,+zbkb,+f,+zknh,+v -target-abi=lp64f \ ; RUN: -riscv-disable-sextw-removal | FileCheck %s --check-prefix=NOREMOVAL define void @test1(i32 signext %arg, i32 signext %arg1) nounwind { @@ -1499,3 +1499,186 @@ bb7: ; preds = %bb2 } declare i32 @llvm.riscv.vmv.x.s.nxv1i32( ) + +; Test that we can look through brev8 in hasAllNBitUsers. +define signext i32 @test21(i64 %arg1, i64 %arg2, i64 %arg3) { +; RV64I-LABEL: test21: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: lui a3, 61681 +; RV64I-NEXT: lui a4, 209715 +; RV64I-NEXT: addi a3, a3, -241 +; RV64I-NEXT: addi a4, a4, 819 +; RV64I-NEXT: slli a5, a3, 32 +; RV64I-NEXT: add a3, a3, a5 +; RV64I-NEXT: slli a5, a4, 32 +; RV64I-NEXT: add a4, a4, a5 +; RV64I-NEXT: li a5, 256 +; RV64I-NEXT: .LBB25_1: # %bb2 +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: srli a6, a0, 4 +; RV64I-NEXT: and a0, a0, a3 +; RV64I-NEXT: and a6, a6, a3 +; RV64I-NEXT: slli a0, a0, 4 +; RV64I-NEXT: or a0, a6, a0 +; RV64I-NEXT: srli a6, a0, 2 +; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: and a6, a6, a4 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: or a0, a6, a0 +; RV64I-NEXT: andi a6, a0, 65 +; RV64I-NEXT: srli a0, a0, 1 +; RV64I-NEXT: slli a6, a6, 1 +; RV64I-NEXT: andi a0, a0, 1104 +; RV64I-NEXT: or a0, a0, a6 +; RV64I-NEXT: addi a2, a2, 1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: bltu a2, a5, .LBB25_1 +; RV64I-NEXT: # %bb.2: # %bb7 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: test21: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: addi a2, a2, -1 +; RV64ZBB-NEXT: li a3, 256 +; RV64ZBB-NEXT: .LBB25_1: # %bb2 +; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64ZBB-NEXT: brev8 a0, a0 +; RV64ZBB-NEXT: andi a0, a0, 1234 +; RV64ZBB-NEXT: addi a2, a2, 1 +; RV64ZBB-NEXT: addw a0, a0, a1 +; RV64ZBB-NEXT: bltu a2, a3, .LBB25_1 +; RV64ZBB-NEXT: # %bb.2: # %bb7 +; RV64ZBB-NEXT: ret +; +; NOREMOVAL-LABEL: test21: +; NOREMOVAL: # %bb.0: # %entry +; NOREMOVAL-NEXT: addi a2, a2, -1 +; NOREMOVAL-NEXT: li a3, 256 +; NOREMOVAL-NEXT: .LBB25_1: # %bb2 +; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: brev8 a0, a0 +; NOREMOVAL-NEXT: andi a0, a0, 1234 +; NOREMOVAL-NEXT: addi a2, a2, 1 +; NOREMOVAL-NEXT: add a0, a0, a1 +; NOREMOVAL-NEXT: bltu a2, a3, .LBB25_1 +; NOREMOVAL-NEXT: # %bb.2: # %bb7 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret +entry: + br label %bb2 + +bb2: ; preds = %bb2, %entry + %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ] + %i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ] + %i3 = add i64 %i2, 1 + %bswap = call i64 @llvm.bswap.i64(i64 %i1) + %bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap) + %i4 = and i64 %bitreverse, 1234 + %i5 = add i64 %i4, %arg2 + %i6 = icmp ugt i64 %i2, 255 + br i1 %i6, label %bb7, label %bb2 + +bb7: ; preds = %bb2 + %i7 = trunc i64 %i5 to i32 + ret i32 %i7 +} + +; Negative test for looking through brev8. Make sure we consider that it works +; on bytes. +define signext i32 @test22(i64 %arg1, i64 %arg2, i64 %arg3) { +; RV64I-LABEL: test22: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a2, a2, -1 +; RV64I-NEXT: lui a3, %hi(.LCPI26_0) +; RV64I-NEXT: lui a4, %hi(.LCPI26_1) +; RV64I-NEXT: lui a5, %hi(.LCPI26_2) +; RV64I-NEXT: lui a6, %hi(.LCPI26_3) +; RV64I-NEXT: li a7, 69 +; RV64I-NEXT: ld a3, %lo(.LCPI26_0)(a3) +; RV64I-NEXT: ld a4, %lo(.LCPI26_1)(a4) +; RV64I-NEXT: ld a5, %lo(.LCPI26_2)(a5) +; RV64I-NEXT: ld a6, %lo(.LCPI26_3)(a6) +; RV64I-NEXT: slli a7, a7, 32 +; RV64I-NEXT: li t0, 65 +; RV64I-NEXT: slli t0, t0, 28 +; RV64I-NEXT: li t1, 256 +; RV64I-NEXT: .LBB26_1: # %bb2 +; RV64I-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64I-NEXT: slli t2, a0, 11 +; RV64I-NEXT: slli a0, a0, 3 +; RV64I-NEXT: and t2, t2, a3 +; RV64I-NEXT: and a0, a0, a4 +; RV64I-NEXT: or a0, a0, t2 +; RV64I-NEXT: srli t2, a0, 2 +; RV64I-NEXT: and a0, a0, a6 +; RV64I-NEXT: and t2, t2, a5 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: or a0, t2, a0 +; RV64I-NEXT: srli t2, a0, 1 +; RV64I-NEXT: and a0, a0, t0 +; RV64I-NEXT: and t2, t2, a7 +; RV64I-NEXT: slli a0, a0, 1 +; RV64I-NEXT: or a0, t2, a0 +; RV64I-NEXT: srli a0, a0, 28 +; RV64I-NEXT: addi a2, a2, 1 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: bltu a2, t1, .LBB26_1 +; RV64I-NEXT: # %bb.2: # %bb7 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: test22: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: addi a2, a2, -1 +; RV64ZBB-NEXT: li a3, 256 +; RV64ZBB-NEXT: .LBB26_1: # %bb2 +; RV64ZBB-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64ZBB-NEXT: slli a0, a0, 7 +; RV64ZBB-NEXT: brev8 a0, a0 +; RV64ZBB-NEXT: srli a0, a0, 28 +; RV64ZBB-NEXT: andi a0, a0, 1234 +; RV64ZBB-NEXT: addi a2, a2, 1 +; RV64ZBB-NEXT: add a0, a0, a1 +; RV64ZBB-NEXT: bltu a2, a3, .LBB26_1 +; RV64ZBB-NEXT: # %bb.2: # %bb7 +; RV64ZBB-NEXT: sext.w a0, a0 +; RV64ZBB-NEXT: ret +; +; NOREMOVAL-LABEL: test22: +; NOREMOVAL: # %bb.0: # %entry +; NOREMOVAL-NEXT: addi a2, a2, -1 +; NOREMOVAL-NEXT: li a3, 256 +; NOREMOVAL-NEXT: .LBB26_1: # %bb2 +; NOREMOVAL-NEXT: # =>This Inner Loop Header: Depth=1 +; NOREMOVAL-NEXT: slli a0, a0, 7 +; NOREMOVAL-NEXT: brev8 a0, a0 +; NOREMOVAL-NEXT: srli a0, a0, 28 +; NOREMOVAL-NEXT: andi a0, a0, 1234 +; NOREMOVAL-NEXT: addi a2, a2, 1 +; NOREMOVAL-NEXT: add a0, a0, a1 +; NOREMOVAL-NEXT: bltu a2, a3, .LBB26_1 +; NOREMOVAL-NEXT: # %bb.2: # %bb7 +; NOREMOVAL-NEXT: sext.w a0, a0 +; NOREMOVAL-NEXT: ret +entry: + br label %bb2 + +bb2: ; preds = %bb2, %entry + %i1 = phi i64 [ %arg1, %entry ], [ %i5, %bb2 ] + %i2 = phi i64 [ %arg3, %entry ], [ %i3, %bb2 ] + %i3 = add i64 %i2, 1 + %shl = shl i64 %i1, 7 + %bswap = call i64 @llvm.bswap.i64(i64 %shl) + %bitreverse = call i64 @llvm.bitreverse.i64(i64 %bswap) + %lshr = lshr i64 %bitreverse, 28 + %i4 = and i64 %lshr, 1234 + %i5 = add i64 %i4, %arg2 + %i6 = icmp ugt i64 %i2, 255 + br i1 %i6, label %bb7, label %bb2 + +bb7: ; preds = %bb2 + %i7 = trunc i64 %i5 to i32 + ret i32 %i7 +}