From cdb6decdf1f6d1ff4a7a553810dc07f93f1c7956 Mon Sep 17 00:00:00 2001 From: Marian Lukac Date: Wed, 4 Sep 2024 16:19:32 +0000 Subject: [PATCH] [AArch64] replace SVE intrinsics with no active lanes with constant --- .../AArch64/AArch64TargetTransformInfo.cpp | 51 ++++++ ...rinsic-comb-no-active-lanes-to-constant.ll | 158 ++++++++++++++++++ 2 files changed, 209 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-constant.ll diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index a782c9c435123..beb8088990967 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1073,6 +1073,18 @@ static bool isAllActivePredicate(Value *Pred) { m_ConstantInt())); } +// Simplify operation where predicate has all inactive lanes by replacing +// instruction with given constant +static std::optional +instCombineSVENoActiveLanesConstant(InstCombiner &IC, IntrinsicInst &II, + Constant *NewVal) { + if (match(II.getOperand(0), m_ZeroInt())) { + IC.replaceInstUsesWith(II, NewVal); + return IC.eraseInstFromFunction(II); + } + return std::nullopt; +} + // Erase unary operation where predicate has all inactive lanes static std::optional instCombineSVENoActiveUnaryErase(InstCombiner &IC, IntrinsicInst &II, @@ -2131,6 +2143,45 @@ AArch64TTIImpl::instCombineIntrinsic(InstCombiner &IC, case Intrinsic::aarch64_sve_st4: case Intrinsic::aarch64_sve_st4q: return instCombineSVENoActiveUnaryErase(IC, II, 4); + case Intrinsic::aarch64_sve_andqv: + case Intrinsic::aarch64_sve_andv: + return instCombineSVENoActiveLanesConstant( + IC, II, ConstantInt::getAllOnesValue(II.getType())); + case Intrinsic::aarch64_sve_fmaxnmqv: + case Intrinsic::aarch64_sve_fmaxnmv: + case Intrinsic::aarch64_sve_fminnmqv: + case Intrinsic::aarch64_sve_fminnmv: + return instCombineSVENoActiveLanesConstant( + IC, II, ConstantFP::getNaN(II.getType())); + case Intrinsic::aarch64_sve_fmaxqv: + case Intrinsic::aarch64_sve_fmaxv: + return instCombineSVENoActiveLanesConstant( + IC, II, ConstantFP::getInfinity(II.getType(), true)); + case Intrinsic::aarch64_sve_fminqv: + case Intrinsic::aarch64_sve_fminv: + return instCombineSVENoActiveLanesConstant( + IC, II, ConstantFP::getInfinity(II.getType())); + case Intrinsic::aarch64_sve_smaxv: + case Intrinsic::aarch64_sve_smaxqv: { + auto RetTy = II.getType(); + auto *MinSInt = ConstantInt::get( + RetTy, APInt::getSignedMinValue(RetTy->getScalarSizeInBits())); + return instCombineSVENoActiveLanesConstant(IC, II, MinSInt); + } + case Intrinsic::aarch64_sve_sminv: + case Intrinsic::aarch64_sve_sminqv: { + auto RetTy = II.getType(); + auto *MaxSInt = ConstantInt::get( + RetTy, APInt::getSignedMaxValue(RetTy->getScalarSizeInBits())); + return instCombineSVENoActiveLanesConstant(IC, II, MaxSInt); + } + case Intrinsic::aarch64_sve_uminv: + case Intrinsic::aarch64_sve_uminqv: { + auto RetTy = II.getType(); + auto *MaxUInt = ConstantInt::get( + RetTy, APInt::getMaxValue(RetTy->getScalarSizeInBits())); + return instCombineSVENoActiveLanesConstant(IC, II, MaxUInt); + } case Intrinsic::aarch64_sve_ld1_gather: case Intrinsic::aarch64_sve_ld1_gather_scalar_offset: case Intrinsic::aarch64_sve_ld1_gather_sxtw: diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-constant.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-constant.ll new file mode 100644 index 0000000000000..9755582845999 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes-to-constant.ll @@ -0,0 +1,158 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +;RUN: opt -S -passes=instcombine < %s | FileCheck %s +target triple = "aarch64-unknown-linux-gnu" + +define <16 x i8> @andqv_i8( %a) { +; CHECK-LABEL: define <16 x i8> @andqv_i8( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <16 x i8> +; + %res = call <16 x i8> @llvm.aarch64.sve.andqv.v16i8.nxv16i8( zeroinitializer, %a); + ret <16 x i8> %res +} + +define i8 @andv_i8( %a) { +; CHECK-LABEL: define i8 @andv_i8( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret i8 -1 +; + %res = call i8 @llvm.aarch64.sve.andv.v16i8.nxv16i8( zeroinitializer, %a); + ret i8 %res +} + +define <4 x float> @fmaxnmqv_f32( %a) { +; CHECK-LABEL: define <4 x float> @fmaxnmqv_f32( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <4 x float> +; + %res = call <4 x float> @llvm.aarch64.sve.fmaxnmqv.v4f32.nxv4f32( zeroinitializer, + %a) + ret <4 x float> %res +} + +define float @fmaxnmv_f32( %a) { +; CHECK-LABEL: define float @fmaxnmv_f32( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret float 0x7FF8000000000000 +; + %res = call float @llvm.aarch64.sve.fmaxnmv.nxv4f32( zeroinitializer, + %a) + ret float %res +} + +define <4 x float> @fminnmqv_f32( %a) { +; CHECK-LABEL: define <4 x float> @fminnmqv_f32( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <4 x float> +; + %res = call <4 x float> @llvm.aarch64.sve.fminnmqv.v4f32.nxv4f32( zeroinitializer, + %a) + ret <4 x float> %res +} + +define float @fminnmv_f32( %a) { +; CHECK-LABEL: define float @fminnmv_f32( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret float 0x7FF8000000000000 +; + %res = call float @llvm.aarch64.sve.fminnmv.nxv4f32( zeroinitializer, + %a) + ret float %res +} + +define <2 x double> @fmaxqv_f64( %a) { +; CHECK-LABEL: define <2 x double> @fmaxqv_f64( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <2 x double> +; + %res = call <2 x double> @llvm.aarch64.sve.fmaxqv.v2f64.nxv2f64( zeroinitializer, + %a) + ret <2 x double> %res +} + +define double @fmaxv_f64( %a) { +; CHECK-LABEL: define double @fmaxv_f64( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret double 0xFFF0000000000000 +; + %res = call double @llvm.aarch64.sve.fmaxv.nxv2f64( zeroinitializer, + %a) + ret double %res +} + +define <2 x double> @fminqv_f64( %a) { +; CHECK-LABEL: define <2 x double> @fminqv_f64( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <2 x double> +; + %res = call <2 x double> @llvm.aarch64.sve.fminqv.v2f64.nxv2f64( zeroinitializer, + %a) + ret <2 x double> %res +} + +define double @fminv_f64( %a) { +; CHECK-LABEL: define double @fminv_f64( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret double 0x7FF0000000000000 +; + %res = call double @llvm.aarch64.sve.fminv.nxv2f64( zeroinitializer, + %a) + ret double %res +} + +define i16 @smaxv_i16( %a) { +; CHECK-LABEL: define i16 @smaxv_i16( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret i16 -32768 +; + %out = call i16 @llvm.aarch64.sve.smaxv.nxv8i16( zeroinitializer, + %a) + ret i16 %out +} + +define <8 x i16> @smaxqv_i16( %a) { +; CHECK-LABEL: define <8 x i16> @smaxqv_i16( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <8 x i16> +; + %res = call <8 x i16> @llvm.aarch64.sve.smaxqv.v8i16.nxv8i16( zeroinitializer, %a); + ret <8 x i16> %res +} + +define i16 @sminv_i16( %a) { +; CHECK-LABEL: define i16 @sminv_i16( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret i16 32767 +; + %out = call i16 @llvm.aarch64.sve.sminv.nxv8i16( zeroinitializer, + %a) + ret i16 %out +} + +define <8 x i16> @sminqv_i16( %a) { +; CHECK-LABEL: define <8 x i16> @sminqv_i16( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <8 x i16> +; + %res = call <8 x i16> @llvm.aarch64.sve.sminqv.v8i16.nxv8i16( zeroinitializer, %a); + ret <8 x i16> %res +} + +define i16 @uminv_i16( %a) { +; CHECK-LABEL: define i16 @uminv_i16( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret i16 -1 +; + %out = call i16 @llvm.aarch64.sve.uminv.nxv8i16( zeroinitializer, + %a) + ret i16 %out +} + +define <8 x i16> @uminqv_i16( %a) { +; CHECK-LABEL: define <8 x i16> @uminqv_i16( +; CHECK-SAME: [[A:%.*]]) { +; CHECK-NEXT: ret <8 x i16> +; + %res = call <8 x i16> @llvm.aarch64.sve.uminqv.v8i16.nxv8i16( zeroinitializer, %a); + ret <8 x i16> %res +}