diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index c0abbd32eeec4..eb60b966c8e02 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -985,6 +985,16 @@ static bool isAllActivePredicate(Value *Pred) { m_ConstantInt())); } +// Erase unary operation where predicate has all inactive lanes +static std::optional +instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, + int PredPos) { + if (match(II.getOperand(PredPos), m_ZeroInt())) { + return IC.eraseInstFromFunction(II); + } + return std::nullopt; +} + // Simplify unary operation where predicate has all inactive lanes by replacing // instruction with zeroed object static std::optional @@ -2007,6 +2017,32 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, default: break; + case Intrinsic::aarch64_sve_st1_scatter: + case Intrinsic::aarch64_sve_st1_scatter_scalar_offset: + case Intrinsic::aarch64_sve_st1_scatter_sxtw: + case Intrinsic::aarch64_sve_st1_scatter_sxtw_index: + case Intrinsic::aarch64_sve_st1_scatter_uxtw: + case Intrinsic::aarch64_sve_st1_scatter_uxtw_index: + case Intrinsic::aarch64_sve_st1dq: + case Intrinsic::aarch64_sve_st1q_scatter_index: + case Intrinsic::aarch64_sve_st1q_scatter_scalar_offset: + case Intrinsic::aarch64_sve_st1q_scatter_vector_offset: + case Intrinsic::aarch64_sve_st1wq: + case Intrinsic::aarch64_sve_stnt1: + case Intrinsic::aarch64_sve_stnt1_scatter: + case Intrinsic::aarch64_sve_stnt1_scatter_index: + case Intrinsic::aarch64_sve_stnt1_scatter_scalar_offset: + case Intrinsic::aarch64_sve_stnt1_scatter_uxtw: + return instCombineSVENoActiveUnaryErase(IC, II, 1); + case Intrinsic::aarch64_sve_st2: + case Intrinsic::aarch64_sve_st2q: + return instCombineSVENoActiveUnaryErase(IC, II, 2); + case Intrinsic::aarch64_sve_st3: + case Intrinsic::aarch64_sve_st3q: + return instCombineSVENoActiveUnaryErase(IC, II, 3); + case Intrinsic::aarch64_sve_st4: + case Intrinsic::aarch64_sve_st4q: + return instCombineSVENoActiveUnaryErase(IC, II, 4); case Intrinsic::aarch64_sve_ld1_gather: case Intrinsic::aarch64_sve_ld1_gather_scalar_offset: case Intrinsic::aarch64_sve_ld1_gather_sxtw: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-stores.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-stores.ll new file mode 100644 index 0000000000000..5908dd1b46d72 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-stores.ll @@ -0,0 +1,310 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +;RUN: opt -S -passes=instcombine < %s | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + +define void @test_st1(ptr %a, %b) { +; CHECK-LABEL: define void @test_st1( +; CHECK-SAME: ptr [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.st1.nxv16i8( %b, zeroinitializer, ptr %a) + ret void +} + +define void @test_st1_scatter( %data_trunc, ptr %base, %b) { +; CHECK-LABEL: define void @test_st1_scatter( +; CHECK-SAME: [[DATA_TRUNC:%.*]], ptr [[BASE:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.st1.scatter.nxv2i16( %data_trunc, + zeroinitializer, + ptr %base, + %b) + ret void +} + +define void @test_st1_scatter_index( %data_trunc, ptr %base, %offsets) { +; CHECK-LABEL: define void @test_st1_scatter_index( +; CHECK-SAME: [[DATA_TRUNC:%.*]], ptr [[BASE:%.*]], [[OFFSETS:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32( [[DATA_TRUNC]], zeroinitializer, ptr [[BASE]], [[OFFSETS]]) +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.st1.scatter.index.nxv2i32( %data_trunc, + zeroinitializer, + ptr %base, + %offsets) + ret void +} + +define void @test_st1_scatter_scalar_offset( %data_trunc, %base) { +; CHECK-LABEL: define void @test_st1_scatter_scalar_offset( +; CHECK-SAME: [[DATA_TRUNC:%.*]], [[BASE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.st1.scatter.scalar.offset.nxv4i8.nxv4i32( %data_trunc, + zeroinitializer, + %base, + i64 16) + ret void +} + +define void @test_st1_scatter_sxtw( %data_trunc, ptr %base, %offsets) { +; CHECK-LABEL: define void @test_st1_scatter_sxtw( +; CHECK-SAME: [[DATA_TRUNC:%.*]], ptr [[BASE:%.*]], [[OFFSETS:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.st1.scatter.sxtw.nxv4i8( %data_trunc, + zeroinitializer, + ptr %base, + %offsets) + ret void +} + +define void @test_st1_scatter_sxtw_index( %data_trunc, ptr %base, %indices) { +; CHECK-LABEL: define void @test_st1_scatter_sxtw_index( +; CHECK-SAME: [[DATA_TRUNC:%.*]], ptr [[BASE:%.*]], [[INDICES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.st1.scatter.sxtw.index.nxv4i16( %data_trunc, + zeroinitializer, + ptr %base, + %indices) + ret void +} + +define void @test_st1_scatter_uxtw( %data_trunc, ptr %base, %offsets) { +; CHECK-LABEL: define void @test_st1_scatter_uxtw( +; CHECK-SAME: [[DATA_TRUNC:%.*]], ptr [[BASE:%.*]], [[OFFSETS:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.st1.scatter.uxtw.nxv4i8( %data_trunc, + zeroinitializer, + ptr %base, + %offsets) + ret void +} + +define void @test_st1_scatter_uxtw_index( %data_trunc, ptr %base, %indices) { +; CHECK-LABEL: define void @test_st1_scatter_uxtw_index( +; CHECK-SAME: [[DATA_TRUNC:%.*]], ptr [[BASE:%.*]], [[INDICES:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.st1.scatter.uxtw.index.nxv4i16( %data_trunc, + zeroinitializer, + ptr %base, + %indices) + ret void +} + +define void @test_st1dq( %zt, ptr %gep1) { +; CHECK-LABEL: define void @test_st1dq( +; CHECK-SAME: [[ZT:%.*]], ptr [[GEP1:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.st1dq.nxv2i64( %zt, zeroinitializer, ptr %gep1) + ret void +} + +define void @test_st1q_scatter_index( %data, %pg, ptr %base, %idx) { +; CHECK-LABEL: define void @test_st1q_scatter_index( +; CHECK-SAME: [[DATA:%.*]], [[PG:%.*]], ptr [[BASE:%.*]], [[IDX:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.st1q.scatter.index.nxv8i16( %data, zeroinitializer, ptr %base, %idx) + ret void +} + +define void @test_st1q_scatter_scalar_offset( %data, %base) { +; CHECK-LABEL: define void @test_st1q_scatter_scalar_offset( +; CHECK-SAME: [[DATA:%.*]], [[BASE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.st1q.scatter.scalar.offset.nxv2i64.nxv2i64( %data, zeroinitializer, %base, i64 0) + ret void +} + +define void @test_st1q_scatter_vector_offset( %data, ptr %base, %off) { +; CHECK-LABEL: define void @test_st1q_scatter_vector_offset( +; CHECK-SAME: [[DATA:%.*]], ptr [[BASE:%.*]], [[OFF:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.st1q.scatter.vector.offset.nxv8i16( %data, zeroinitializer, ptr %base, %off) + ret void +} + +define void @test_st1wq(ptr %a, %b) { +; CHECK-LABEL: define void @test_st1wq( +; CHECK-SAME: ptr [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.st1wq.nxv4i32( %b, zeroinitializer, ptr %a) + ret void +} + + +define void @test_st2(ptr %a, %b) { +; CHECK-LABEL: define void @test_st2( +; CHECK-SAME: ptr [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv8i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv8i32( %b, i64 4) + tail call void @llvm.aarch64.sve.st2.nxv4i32( %0, %1, zeroinitializer, ptr %a) + ret void +} + +define void @test_st2q(ptr %a, %b) { +; CHECK-LABEL: define void @test_st2q( +; CHECK-SAME: ptr [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv8i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv8i32( %b, i64 4) + tail call void @llvm.aarch64.sve.st2q.nxv4i32( %0, %1, zeroinitializer, ptr %a) + ret void +} + +define void @test_st3(ptr %a, %b) { +; CHECK-LABEL: define void @test_st3( +; CHECK-SAME: ptr [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 4) + %2 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 8) + tail call void @llvm.aarch64.sve.st3.nxv4i32( %0, %1, %2, zeroinitializer, ptr %a) + ret void +} + +define void @test_st3q(ptr %a, %b) { +; CHECK-LABEL: define void @test_st3q( +; CHECK-SAME: ptr [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 4) + %2 = tail call @llvm.vector.extract.nxv4i32.nxv12i32( %b, i64 8) + tail call void @llvm.aarch64.sve.st3q.nxv4i32( %0, %1, %2, zeroinitializer, ptr %a) + ret void +} + +define void @test_st4(ptr %a, %b) { +; CHECK-LABEL: define void @test_st4( +; CHECK-SAME: ptr [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 4) + %2 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 8) + %3 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 12) + tail call void @llvm.aarch64.sve.st4.nxv4i32( %0, %1, %2, %3, zeroinitializer, ptr %a) + ret void +} + +define void @test_st4q(ptr %a, %b) { +; CHECK-LABEL: define void @test_st4q( +; CHECK-SAME: ptr [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + %0 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 0) + %1 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 4) + %2 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 8) + %3 = tail call @llvm.vector.extract.nxv4i32.nxv16i32( %b, i64 12) + tail call void @llvm.aarch64.sve.st4q.nxv4i32( %0, %1, %2, %3, zeroinitializer, ptr %a) + ret void +} + +define void @test_stnt1(ptr %a, %b) { +; CHECK-LABEL: define void @test_stnt1( +; CHECK-SAME: ptr [[A:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.stnt1.nxv16i8( %b, zeroinitializer, ptr %a) + ret void +} + +define void @test_stnt1_scatter( %data_trunc, ptr %base, %b) { +; CHECK-LABEL: define void @test_stnt1_scatter( +; CHECK-SAME: [[DATA_TRUNC:%.*]], ptr [[BASE:%.*]], [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.stnt1.scatter.nxv2i16( %data_trunc, + zeroinitializer, + ptr %base, + %b) + ret void +} + +define void @test_stnt1_scatter_index( %data_trunc, ptr %base, %offsets) { +; CHECK-LABEL: define void @test_stnt1_scatter_index( +; CHECK-SAME: [[DATA_TRUNC:%.*]], ptr [[BASE:%.*]], [[OFFSETS:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.stnt1.scatter.index.nxv2i32( %data_trunc, + zeroinitializer, + ptr %base, + %offsets) + ret void +} + +define void @test_stnt1_scatter_scalar_offset( %data_trunc, %base) { +; CHECK-LABEL: define void @test_stnt1_scatter_scalar_offset( +; CHECK-SAME: [[DATA_TRUNC:%.*]], [[BASE:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: ret void +; +entry: + call void @llvm.aarch64.sve.stnt1.scatter.scalar.offset.nxv4i8.nxv4i32( %data_trunc, + zeroinitializer, + %base, + i64 16) + ret void +} + +define void @test_stnt1_scatter_uxtw( %data_trunc, ptr %base, %offsets) { +; CHECK-LABEL: define void @test_stnt1_scatter_uxtw( +; CHECK-SAME: [[DATA_TRUNC:%.*]], ptr [[BASE:%.*]], [[OFFSETS:%.*]]) { +; CHECK-NEXT: ret void +; + call void @llvm.aarch64.sve.stnt1.scatter.uxtw.nxv4i8( %data_trunc, + zeroinitializer, + ptr %base, + %offsets) + ret void +}