From d1f24502852acd283d6acb1f6acea434c45fd908 Mon Sep 17 00:00:00 2001 From: Ronen Ulanovsky Date: Thu, 13 Jul 2023 19:57:30 +0300 Subject: [PATCH 1/3] [Xtensa] Fix FP mul-sub fusion MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `performMADD_MSUBCombine` treats `x * y ± z` and `z ± x * y` interchangeably which is wrong for `msub.s` which expects the latter. Added a check to determine that the orientation is correct, and if not, negate the result. --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 27 +++++++++++-------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index b51270f41e0bb..15a3609c99d4c 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -512,32 +512,37 @@ void XtensaTargetLowering::LowerAsmOperandForConstraint( static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG, const XtensaSubtarget &Subtarget) { - if (ROOTNode->getOperand(0).getValueType() != MVT::f32) - return SDValue(); + SDValue LHS = ROOTNode->getOperand(0); + SDValue RHS = ROOTNode->getOperand(1); - if (ROOTNode->getOperand(0).getOpcode() != ISD::FMUL && - ROOTNode->getOperand(1).getOpcode() != ISD::FMUL) + if (LHS.getValueType() != MVT::f32) return SDValue(); - SDValue Mult = ROOTNode->getOperand(0).getOpcode() == ISD::FMUL - ? ROOTNode->getOperand(0) - : ROOTNode->getOperand(1); + bool IsAdd = ROOTNode->getOpcode() == ISD::FADD; + + SDValue Mult = LHS, AddOperand = RHS; + bool NegRes = !IsAdd; - SDValue AddOperand = ROOTNode->getOperand(0).getOpcode() == ISD::FMUL - ? ROOTNode->getOperand(1) - : ROOTNode->getOperand(0); + if (LHS.getOpcode() != ISD::FMUL && RHS.getOpcode() != ISD::FMUL) + return SDValue(); + else if (RHS.getOpcode() == ISD::FMUL) { + Mult = RHS; + AddOperand = LHS; + NegRes = false; + } if (!Mult.hasOneUse()) return SDValue(); SDLoc DL(ROOTNode); - bool IsAdd = ROOTNode->getOpcode() == ISD::FADD; unsigned Opcode = IsAdd ? XtensaISD::MADD : XtensaISD::MSUB; SDValue MAddOps[3] = {AddOperand, Mult->getOperand(0), Mult->getOperand(1)}; EVT VTs[3] = {MVT::f32, MVT::f32, MVT::f32}; SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps); + if (NegRes) + return CurDAG.getNode(ISD::FNEG, DL, MVT::f32, MAdd); return MAdd; } From fca9a24b8e482b0d06a5a56df0c01cfa09a165ca Mon Sep 17 00:00:00 2001 From: Ronen Ulanovsky Date: Sun, 16 Jul 2023 01:22:50 +0300 Subject: [PATCH 2/3] [Xtensa] Add more valid FMA patterns and tests 1. Prefer `fneg.s` to `l32r ar, 0x80000000; xor ar, as, ar` when `wfr/rfr` are used anyway 2. Add Patterns for fma/madd/msub to automatically generate msub when it's the better choice 3. XtensaISelLowering.cpp: Rely on LLVM to lower FMA to madd.s/msub.s instead of hardcoding 4. Add float-fma.ll with various fused multiply add/subtract permutations --- llvm/lib/Target/Xtensa/XtensaISelLowering.cpp | 48 ++++-- llvm/lib/Target/Xtensa/XtensaISelLowering.h | 2 + llvm/lib/Target/Xtensa/XtensaInstrInfo.td | 9 +- llvm/test/CodeGen/Xtensa/float-fma.ll | 147 ++++++++++++++++++ 4 files changed, 187 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/Xtensa/float-fma.ll diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp index 15a3609c99d4c..db3eb0dddf138 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.cpp @@ -374,6 +374,20 @@ MVT XtensaTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context, return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT); } +bool XtensaTargetLowering::isFNegFree(EVT VT) const { + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f32: + return Subtarget.hasSingleFloat(); + default: + break; + } + + return false; +} + bool XtensaTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const { if (!VT.isSimple()) @@ -515,35 +529,35 @@ static SDValue performMADD_MSUBCombine(SDNode *ROOTNode, SelectionDAG &CurDAG, SDValue LHS = ROOTNode->getOperand(0); SDValue RHS = ROOTNode->getOperand(1); - if (LHS.getValueType() != MVT::f32) + if (LHS.getValueType() != MVT::f32 || (LHS.getOpcode() != ISD::FMUL && RHS.getOpcode() != ISD::FMUL)) return SDValue(); + SDLoc DL(ROOTNode); bool IsAdd = ROOTNode->getOpcode() == ISD::FADD; - SDValue Mult = LHS, AddOperand = RHS; - bool NegRes = !IsAdd; + SDValue Mult, AddOperand; + bool Inverted; - if (LHS.getOpcode() != ISD::FMUL && RHS.getOpcode() != ISD::FMUL) - return SDValue(); - else if (RHS.getOpcode() == ISD::FMUL) { - Mult = RHS; - AddOperand = LHS; - NegRes = false; - } + if (LHS.getOpcode() == ISD::FMUL) + Mult = LHS, AddOperand = RHS, Inverted = false; + else + Mult = RHS, AddOperand = LHS, Inverted = true; if (!Mult.hasOneUse()) return SDValue(); - SDLoc DL(ROOTNode); + SDValue MultOperand0 = Mult->getOperand(0), MultOperand1 = Mult->getOperand(1); + + if (!IsAdd) + if (Inverted) + MultOperand0 = CurDAG.getNode(ISD::FNEG, DL, MVT::f32, MultOperand0); + else + AddOperand = CurDAG.getNode(ISD::FNEG, DL, MVT::f32, AddOperand); - unsigned Opcode = IsAdd ? XtensaISD::MADD : XtensaISD::MSUB; - SDValue MAddOps[3] = {AddOperand, Mult->getOperand(0), Mult->getOperand(1)}; + SDValue FMAOps[3] = {MultOperand0, MultOperand1, AddOperand}; EVT VTs[3] = {MVT::f32, MVT::f32, MVT::f32}; - SDValue MAdd = CurDAG.getNode(Opcode, DL, VTs, MAddOps); - if (NegRes) - return CurDAG.getNode(ISD::FNEG, DL, MVT::f32, MAdd); - return MAdd; + return CurDAG.getNode(ISD::FMA, DL, VTs, FMAOps); } static SDValue performSUBCombine(SDNode *N, SelectionDAG &DAG, diff --git a/llvm/lib/Target/Xtensa/XtensaISelLowering.h b/llvm/lib/Target/Xtensa/XtensaISelLowering.h index eaa8a0776346d..762eb475ed9b0 100644 --- a/llvm/lib/Target/Xtensa/XtensaISelLowering.h +++ b/llvm/lib/Target/Xtensa/XtensaISelLowering.h @@ -120,6 +120,8 @@ class XtensaTargetLowering : public TargetLowering { bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override; + bool isFNegFree(EVT VT) const override; + /// If a physical register, this returns the register that receives the /// exception address on entry to an EH pad. Register diff --git a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td index 29bce03c30367..ba63fdaa5b7df 100644 --- a/llvm/lib/Target/Xtensa/XtensaInstrInfo.td +++ b/llvm/lib/Target/Xtensa/XtensaInstrInfo.td @@ -1112,15 +1112,16 @@ def FLOOR_S : RRR_Inst<0x00, 0x0A, 0x0A, (outs AR:$r), (ins FPR:$s, uimm4:$imm), let t = imm; } -def MADDN_S : RRR_Inst<0x00, 0x0A, 0x06, (outs FPR:$r), (ins FPR:$s, FPR:$t), +def MADDN_S : RRR_Inst<0x00, 0x0A, 0x06, (outs FPR:$r), (ins FPR:$a, FPR:$s, FPR:$t), "maddn.s\t$r, $s, $t", []>, Requires<[HasSingleFloat]> { let isCommutable = 0; + let Constraints = "$r = $a"; } // FP multipy-add def MADD_S : RRR_Inst<0x00, 0x0A, 0x04, (outs FPR:$r), (ins FPR:$a, FPR:$s, FPR:$t), "madd.s\t$r, $s, $t", - [(set FPR:$r, (Xtensa_madd FPR:$a, FPR:$s, FPR:$t))]>, + [(set FPR:$r, (Xtensa_madd FPR:$a, FPR:$s, FPR:$t))]>, Requires<[HasSingleFloat]> { let isCommutable = 0; let isReMaterializable = 0; @@ -1175,6 +1176,10 @@ def MSUB_S : RRR_Inst<0x00, 0x0A, 0x05, (outs FPR:$r), (ins FPR:$a, FPR:$s, FPR: let Constraints = "$r = $a"; } +// fmsub: -r1 * r2 + r3 +def : Pat<(fma (fneg FPR:$r1), FPR:$r2, FPR:$r3), + (MSUB_S $r3, $r1, $r2)>; + def NEXP01_S : RRR_Inst<0x00, 0x0A, 0x0F, (outs FPR:$r), (ins FPR:$s), "nexp01.s\t$r, $s", []>, Requires<[HasSingleFloat]> { let t = 0x0B; diff --git a/llvm/test/CodeGen/Xtensa/float-fma.ll b/llvm/test/CodeGen/Xtensa/float-fma.ll new file mode 100644 index 0000000000000..73fc196f4a33c --- /dev/null +++ b/llvm/test/CodeGen/Xtensa/float-fma.ll @@ -0,0 +1,147 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=xtensa -mcpu=esp32 -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=XTENSA %s + +define float @fmadd_s(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fmadd_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: rfr a2, f10 +; XTENSA-NEXT: retw.n + %mul = fmul float %a, %b + %add = fadd float %mul, %c + ret float %add +} + +define float @fmsub_s(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fmsub_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: neg.s f10, f10 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: rfr a2, f10 +; XTENSA-NEXT: retw.n + %mul = fmul float %a, %b + %sub = fsub float %mul, %c + ret float %sub +} + +define float @fnmadd_s(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmadd_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: neg.s f8, f10 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: retw.n + %mul = fmul float %a, %b + %add = fadd float %mul, %c + %negadd = fneg float %add + ret float %negadd +} + + +define float @fnmsub_s(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmsub_s: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: msub.s f10, f9, f8 +; XTENSA-NEXT: rfr a2, f10 +; XTENSA-NEXT: retw.n + %nega = fneg float %a + %mul = fmul float %nega, %b + %add = fadd float %mul, %c + ret float %add +} + +declare float @llvm.fma.f32(float, float, float) + +define float @fmadd_s_intrinsics(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fmadd_s_intrinsics: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: rfr a2, f10 +; XTENSA-NEXT: retw.n + %fma = call float @llvm.fma.f32(float %a, float %b, float %c) + ret float %fma +} + +define float @fmsub_s_intrinsics(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fmsub_s_intrinsics: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: neg.s f10, f10 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: rfr a2, f10 +; XTENSA-NEXT: retw.n + %negc = fneg float %c + %fma = call float @llvm.fma.f32(float %a, float %b, float %negc) + ret float %fma +} + +define float @fnmadd_s_intrinsics(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmadd_s_intrinsics: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: neg.s f8, f10 +; XTENSA-NEXT: rfr a2, f8 +; XTENSA-NEXT: retw.n + %fma = call float @llvm.fma.f32(float %a, float %b, float %c) + %neg = fneg float %fma + ret float %neg +} + +define float @fnmsub_s_intrinsics(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmsub_s_intrinsics: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: msub.s f10, f9, f8 +; XTENSA-NEXT: rfr a2, f10 +; XTENSA-NEXT: retw.n + %nega = fneg float %a + %fma = call float @llvm.fma.f32(float %nega, float %b, float %c) + ret float %fma +} + +define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind { +; XTENSA-LABEL: fnmsub_s_swap_intrinsics: +; XTENSA: # %bb.0: +; XTENSA-NEXT: entry a1, 32 +; XTENSA-NEXT: wfr f8, a3 +; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f10, a4 +; XTENSA-NEXT: neg.s f10, f10 +; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: rfr a2, f10 +; XTENSA-NEXT: retw.n + %negc = fneg float %c + %fma = call float @llvm.fma.f32(float %a, float %b, float %negc) + ret float %fma +} From 8268f03e95a23264f6893cb0502d0debdc6fe6f0 Mon Sep 17 00:00:00 2001 From: Ronen Ulanovsky Date: Sun, 20 Aug 2023 03:02:02 +0300 Subject: [PATCH 3/3] [Xtensa] fix `fnmsub_s_swap_intrinsics` test --- llvm/test/CodeGen/Xtensa/float-fma.ll | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/llvm/test/CodeGen/Xtensa/float-fma.ll b/llvm/test/CodeGen/Xtensa/float-fma.ll index 73fc196f4a33c..484b2705a18c6 100644 --- a/llvm/test/CodeGen/Xtensa/float-fma.ll +++ b/llvm/test/CodeGen/Xtensa/float-fma.ll @@ -134,14 +134,13 @@ define float @fnmsub_s_swap_intrinsics(float %a, float %b, float %c) nounwind { ; XTENSA-LABEL: fnmsub_s_swap_intrinsics: ; XTENSA: # %bb.0: ; XTENSA-NEXT: entry a1, 32 -; XTENSA-NEXT: wfr f8, a3 -; XTENSA-NEXT: wfr f9, a2 +; XTENSA-NEXT: wfr f8, a2 +; XTENSA-NEXT: wfr f9, a3 ; XTENSA-NEXT: wfr f10, a4 -; XTENSA-NEXT: neg.s f10, f10 -; XTENSA-NEXT: madd.s f10, f9, f8 +; XTENSA-NEXT: msub.s f10, f9, f8 ; XTENSA-NEXT: rfr a2, f10 ; XTENSA-NEXT: retw.n - %negc = fneg float %c - %fma = call float @llvm.fma.f32(float %a, float %b, float %negc) + %negb = fneg float %b + %fma = call float @llvm.fma.f32(float %a, float %negb, float %c) ret float %fma }