@@ -3461,6 +3461,62 @@ std::optional<int64_t> SIInstrInfo::extractSubregFromImm(int64_t Imm,
34613461 llvm_unreachable (" covered subregister switch" );
34623462}
34633463
3464+ static unsigned getNewFMAAKInst (const GCNSubtarget &ST, unsigned Opc) {
3465+ switch (Opc) {
3466+ case AMDGPU::V_MAC_F16_e32:
3467+ case AMDGPU::V_MAC_F16_e64:
3468+ case AMDGPU::V_MAD_F16_e64:
3469+ return AMDGPU::V_MADAK_F16;
3470+ case AMDGPU::V_MAC_F32_e32:
3471+ case AMDGPU::V_MAC_F32_e64:
3472+ case AMDGPU::V_MAD_F32_e64:
3473+ return AMDGPU::V_MADAK_F32;
3474+ case AMDGPU::V_FMAC_F32_e32:
3475+ case AMDGPU::V_FMAC_F32_e64:
3476+ case AMDGPU::V_FMA_F32_e64:
3477+ return AMDGPU::V_FMAAK_F32;
3478+ case AMDGPU::V_FMAC_F16_e32:
3479+ case AMDGPU::V_FMAC_F16_e64:
3480+ case AMDGPU::V_FMAC_F16_t16_e64:
3481+ case AMDGPU::V_FMAC_F16_fake16_e64:
3482+ case AMDGPU::V_FMA_F16_e64:
3483+ return ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3484+ ? AMDGPU::V_FMAAK_F16_t16
3485+ : AMDGPU::V_FMAAK_F16_fake16
3486+ : AMDGPU::V_FMAAK_F16;
3487+ default :
3488+ llvm_unreachable (" invalid instruction" );
3489+ }
3490+ }
3491+
3492+ static unsigned getNewFMAMKInst (const GCNSubtarget &ST, unsigned Opc) {
3493+ switch (Opc) {
3494+ case AMDGPU::V_MAC_F16_e32:
3495+ case AMDGPU::V_MAC_F16_e64:
3496+ case AMDGPU::V_MAD_F16_e64:
3497+ return AMDGPU::V_MADMK_F16;
3498+ case AMDGPU::V_MAC_F32_e32:
3499+ case AMDGPU::V_MAC_F32_e64:
3500+ case AMDGPU::V_MAD_F32_e64:
3501+ return AMDGPU::V_MADMK_F32;
3502+ case AMDGPU::V_FMAC_F32_e32:
3503+ case AMDGPU::V_FMAC_F32_e64:
3504+ case AMDGPU::V_FMA_F32_e64:
3505+ return AMDGPU::V_FMAMK_F32;
3506+ case AMDGPU::V_FMAC_F16_e32:
3507+ case AMDGPU::V_FMAC_F16_e64:
3508+ case AMDGPU::V_FMAC_F16_t16_e64:
3509+ case AMDGPU::V_FMAC_F16_fake16_e64:
3510+ case AMDGPU::V_FMA_F16_e64:
3511+ return ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3512+ ? AMDGPU::V_FMAMK_F16_t16
3513+ : AMDGPU::V_FMAMK_F16_fake16
3514+ : AMDGPU::V_FMAMK_F16;
3515+ default :
3516+ llvm_unreachable (" invalid instruction" );
3517+ }
3518+ }
3519+
34643520bool SIInstrInfo::foldImmediate (MachineInstr &UseMI, MachineInstr &DefMI,
34653521 Register Reg, MachineRegisterInfo *MRI) const {
34663522 if (!MRI->hasOneNonDBGUse (Reg))
@@ -3588,13 +3644,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35883644 !isInlineConstant (Def->getOperand (1 )))
35893645 return false ;
35903646
3591- unsigned NewOpc =
3592- IsFMA ? (IsF32 ? AMDGPU::V_FMAMK_F32
3593- : ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3594- ? AMDGPU::V_FMAMK_F16_t16
3595- : AMDGPU::V_FMAMK_F16_fake16
3596- : AMDGPU::V_FMAMK_F16)
3597- : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
3647+ unsigned NewOpc = getNewFMAMKInst (ST, Opc);
35983648 if (pseudoToMCOpcode (NewOpc) == -1 )
35993649 return false ;
36003650
@@ -3671,13 +3721,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36713721 }
36723722 }
36733723
3674- unsigned NewOpc =
3675- IsFMA ? (IsF32 ? AMDGPU::V_FMAAK_F32
3676- : ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3677- ? AMDGPU::V_FMAAK_F16_t16
3678- : AMDGPU::V_FMAAK_F16_fake16
3679- : AMDGPU::V_FMAAK_F16)
3680- : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
3724+ unsigned NewOpc = getNewFMAAKInst (ST, Opc);
36813725 if (pseudoToMCOpcode (NewOpc) == -1 )
36823726 return false ;
36833727
@@ -4067,14 +4111,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40674111
40684112 int64_t Imm;
40694113 if (!Src0Literal && getFoldableImm (Src2, Imm, &DefMI)) {
4070- unsigned NewOpc =
4071- IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts ()
4072- ? ST.useRealTrue16Insts ()
4073- ? AMDGPU::V_FMAAK_F16_t16
4074- : AMDGPU::V_FMAAK_F16_fake16
4075- : AMDGPU::V_FMAAK_F16)
4076- : AMDGPU::V_FMAAK_F32)
4077- : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
4114+ unsigned NewOpc = getNewFMAAKInst (ST, Opc);
40784115 if (pseudoToMCOpcode (NewOpc) != -1 ) {
40794116 MIB = BuildMI (MBB, MI, MI.getDebugLoc (), get (NewOpc))
40804117 .add (*Dst)
@@ -4089,14 +4126,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40894126 return MIB;
40904127 }
40914128 }
4092- unsigned NewOpc = IsFMA
4093- ? (IsF16 ? (ST.hasTrue16BitInsts ()
4094- ? ST.useRealTrue16Insts ()
4095- ? AMDGPU::V_FMAMK_F16_t16
4096- : AMDGPU::V_FMAMK_F16_fake16
4097- : AMDGPU::V_FMAMK_F16)
4098- : AMDGPU::V_FMAMK_F32)
4099- : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4129+ unsigned NewOpc = getNewFMAMKInst (ST, Opc);
41004130 if (!Src0Literal && getFoldableImm (Src1, Imm, &DefMI)) {
41014131 if (pseudoToMCOpcode (NewOpc) != -1 ) {
41024132 MIB = BuildMI (MBB, MI, MI.getDebugLoc (), get (NewOpc))
0 commit comments