@@ -3544,7 +3544,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35443544      Opc == AMDGPU::V_MAD_F16_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
35453545      Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
35463546      Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3547-       Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
35483547      Opc == AMDGPU::V_FMAC_F16_fake16_e64) {
35493548    //  Don't fold if we are using source or output modifiers. The new VOP2
35503549    //  instructions don't have them.
@@ -3565,7 +3564,6 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35653564    bool  IsFMA =
35663565        Opc == AMDGPU::V_FMA_F32_e64 || Opc == AMDGPU::V_FMAC_F32_e64 ||
35673566        Opc == AMDGPU::V_FMA_F16_e64 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3568-         Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
35693567        Opc == AMDGPU::V_FMAC_F16_fake16_e64;
35703568    MachineOperand *Src1 = getNamedOperand (UseMI, AMDGPU::OpName::src1);
35713569    MachineOperand *Src2 = getNamedOperand (UseMI, AMDGPU::OpName::src2);
@@ -3599,19 +3597,16 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
35993597
36003598      unsigned  NewOpc =
36013599          IsFMA ? (IsF32                    ? AMDGPU::V_FMAMK_F32
3602-                    : ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3603-                                                   ? AMDGPU::V_FMAMK_F16_t16
3604-                                                   : AMDGPU::V_FMAMK_F16_fake16
3600+                    : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_fake16
36053601                                            : AMDGPU::V_FMAMK_F16)
36063602                : (IsF32 ? AMDGPU::V_MADMK_F32 : AMDGPU::V_MADMK_F16);
36073603      if  (pseudoToMCOpcode (NewOpc) == -1 )
36083604        return  false ;
36093605
3610-       //  V_FMAMK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAMK_F16_fake16
3611-       //  takes VGPR_32_Lo128 operands, so the rewrite would also require
3612-       //  restricting their register classes. For now just bail out.
3613-       if  (NewOpc == AMDGPU::V_FMAMK_F16_t16 ||
3614-           NewOpc == AMDGPU::V_FMAMK_F16_fake16)
3606+       //  V_FMAMK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
3607+       //  would also require restricting their register classes. For now
3608+       //  just bail out.
3609+       if  (NewOpc == AMDGPU::V_FMAMK_F16_fake16)
36153610        return  false ;
36163611
36173612      const  int64_t  Imm = getImmFor (RegSrc == Src1 ? *Src0 : *Src1);
@@ -3626,7 +3621,7 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36263621      Src0->setIsKill (RegSrc->isKill ());
36273622
36283623      if  (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3629-           Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || 
3624+           Opc == AMDGPU::V_FMAC_F32_e64 ||
36303625          Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
36313626        UseMI.untieRegOperand (
36323627            AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
@@ -3681,26 +3676,23 @@ bool SIInstrInfo::foldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
36813676
36823677      unsigned  NewOpc =
36833678          IsFMA ? (IsF32                    ? AMDGPU::V_FMAAK_F32
3684-                    : ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3685-                                                   ? AMDGPU::V_FMAAK_F16_t16
3686-                                                   : AMDGPU::V_FMAAK_F16_fake16
3679+                    : ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_fake16
36873680                                            : AMDGPU::V_FMAAK_F16)
36883681                : (IsF32 ? AMDGPU::V_MADAK_F32 : AMDGPU::V_MADAK_F16);
36893682      if  (pseudoToMCOpcode (NewOpc) == -1 )
36903683        return  false ;
36913684
3692-       //  V_FMAAK_F16_t16 takes VGPR_16_Lo128 operands while V_FMAAK_F16_fake16
3693-       //  takes VGPR_32_Lo128 operands, so the rewrite would also require
3694-       //  restricting their register classes. For now just bail out.
3695-       if  (NewOpc == AMDGPU::V_FMAAK_F16_t16 ||
3696-           NewOpc == AMDGPU::V_FMAAK_F16_fake16)
3685+       //  V_FMAAK_F16_fake16 takes VGPR_32_Lo128 operands, so the rewrite
3686+       //  would also require restricting their register classes. For now
3687+       //  just bail out.
3688+       if  (NewOpc == AMDGPU::V_FMAAK_F16_fake16)
36973689        return  false ;
36983690
36993691      //  FIXME: This would be a lot easier if we could return a new instruction
37003692      //  instead of having to modify in place.
37013693
37023694      if  (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
3703-           Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 || 
3695+           Opc == AMDGPU::V_FMAC_F32_e64 ||
37043696          Opc == AMDGPU::V_FMAC_F16_fake16_e64 || Opc == AMDGPU::V_FMAC_F16_e64)
37053697        UseMI.untieRegOperand (
37063698            AMDGPU::getNamedOperandIdx (Opc, AMDGPU::OpName::src2));
@@ -3887,11 +3879,8 @@ static unsigned getNewFMAInst(const GCNSubtarget &ST, unsigned Opc) {
38873879    return  AMDGPU::V_FMA_LEGACY_F32_e64;
38883880  case  AMDGPU::V_FMAC_F16_e32:
38893881  case  AMDGPU::V_FMAC_F16_e64:
3890-   case  AMDGPU::V_FMAC_F16_t16_e64:
38913882  case  AMDGPU::V_FMAC_F16_fake16_e64:
3892-     return  ST.hasTrue16BitInsts () ? ST.useRealTrue16Insts ()
3893-                                         ? AMDGPU::V_FMA_F16_gfx9_t16_e64
3894-                                         : AMDGPU::V_FMA_F16_gfx9_fake16_e64
3883+     return  ST.hasTrue16BitInsts () ? AMDGPU::V_FMA_F16_gfx9_fake16_e64
38953884                                  : AMDGPU::V_FMA_F16_gfx9_e64;
38963885  case  AMDGPU::V_FMAC_F32_e32:
38973886  case  AMDGPU::V_FMAC_F32_e64:
@@ -3957,22 +3946,19 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39573946    return  MIB;
39583947  }
39593948
3960-   assert (Opc != AMDGPU::V_FMAC_F16_t16_e32 &&
3961-          Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3962-          " V_FMAC_F16_t16/fake16_e32 is not supported and not expected to be " 
3963-          " present " 
3964-          " pre-RA" 
3949+   assert (
3950+       Opc != AMDGPU::V_FMAC_F16_fake16_e32 &&
3951+       " V_FMAC_F16_fake16_e32 is not supported and not expected to be present " 
3952+       " pre-RA" 
39653953
39663954  //  Handle MAC/FMAC.
39673955  bool  IsF16 = Opc == AMDGPU::V_MAC_F16_e32 || Opc == AMDGPU::V_MAC_F16_e64 ||
39683956               Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3969-                Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
39703957               Opc == AMDGPU::V_FMAC_F16_fake16_e64;
39713958  bool  IsFMA = Opc == AMDGPU::V_FMAC_F32_e32 || Opc == AMDGPU::V_FMAC_F32_e64 ||
39723959               Opc == AMDGPU::V_FMAC_LEGACY_F32_e32 ||
39733960               Opc == AMDGPU::V_FMAC_LEGACY_F32_e64 ||
39743961               Opc == AMDGPU::V_FMAC_F16_e32 || Opc == AMDGPU::V_FMAC_F16_e64 ||
3975-                Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
39763962               Opc == AMDGPU::V_FMAC_F16_fake16_e64 ||
39773963               Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
39783964  bool  IsF64 = Opc == AMDGPU::V_FMAC_F64_e32 || Opc == AMDGPU::V_FMAC_F64_e64;
@@ -3987,7 +3973,6 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
39873973    return  nullptr ;
39883974  case  AMDGPU::V_MAC_F16_e64:
39893975  case  AMDGPU::V_FMAC_F16_e64:
3990-   case  AMDGPU::V_FMAC_F16_t16_e64:
39913976  case  AMDGPU::V_FMAC_F16_fake16_e64:
39923977  case  AMDGPU::V_MAC_F32_e64:
39933978  case  AMDGPU::V_MAC_LEGACY_F32_e64:
@@ -4073,11 +4058,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40734058    int64_t  Imm;
40744059    if  (!Src0Literal && getFoldableImm (Src2, Imm, &DefMI)) {
40754060      unsigned  NewOpc =
4076-           IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts ()
4077-                                 ? ST.useRealTrue16Insts ()
4078-                                       ? AMDGPU::V_FMAAK_F16_t16
4079-                                       : AMDGPU::V_FMAAK_F16_fake16
4080-                                 : AMDGPU::V_FMAAK_F16)
4061+           IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAAK_F16_fake16
4062+                                                    : AMDGPU::V_FMAAK_F16)
40814063                         : AMDGPU::V_FMAAK_F32)
40824064                : (IsF16 ? AMDGPU::V_MADAK_F16 : AMDGPU::V_MADAK_F32);
40834065      if  (pseudoToMCOpcode (NewOpc) != -1 ) {
@@ -4094,14 +4076,11 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
40944076        return  MIB;
40954077      }
40964078    }
4097-     unsigned  NewOpc = IsFMA
4098-                           ? (IsF16 ? (ST.hasTrue16BitInsts ()
4099-                                           ? ST.useRealTrue16Insts ()
4100-                                                 ? AMDGPU::V_FMAMK_F16_t16
4101-                                                 : AMDGPU::V_FMAMK_F16_fake16
4102-                                           : AMDGPU::V_FMAMK_F16)
4103-                                    : AMDGPU::V_FMAMK_F32)
4104-                           : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
4079+     unsigned  NewOpc =
4080+         IsFMA ? (IsF16 ? (ST.hasTrue16BitInsts () ? AMDGPU::V_FMAMK_F16_fake16
4081+                                                  : AMDGPU::V_FMAMK_F16)
4082+                        : AMDGPU::V_FMAMK_F32)
4083+               : (IsF16 ? AMDGPU::V_MADMK_F16 : AMDGPU::V_MADMK_F32);
41054084    if  (!Src0Literal && getFoldableImm (Src1, Imm, &DefMI)) {
41064085      if  (pseudoToMCOpcode (NewOpc) != -1 ) {
41074086        MIB = BuildMI (MBB, MI, MI.getDebugLoc (), get (NewOpc))
@@ -4547,7 +4526,6 @@ bool SIInstrInfo::canShrink(const MachineInstr &MI,
45474526      case  AMDGPU::V_MAC_F32_e64:
45484527      case  AMDGPU::V_MAC_LEGACY_F32_e64:
45494528      case  AMDGPU::V_FMAC_F16_e64:
4550-       case  AMDGPU::V_FMAC_F16_t16_e64:
45514529      case  AMDGPU::V_FMAC_F16_fake16_e64:
45524530      case  AMDGPU::V_FMAC_F32_e64:
45534531      case  AMDGPU::V_FMAC_F64_e64:
@@ -5604,9 +5582,7 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
56045582  case  AMDGPU::S_MUL_F16: return  AMDGPU::V_MUL_F16_fake16_e64;
56055583  case  AMDGPU::S_CVT_PK_RTZ_F16_F32: return  AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
56065584  case  AMDGPU::S_FMAC_F32: return  AMDGPU::V_FMAC_F32_e64;
5607-   case  AMDGPU::S_FMAC_F16:
5608-     return  ST.useRealTrue16Insts () ? AMDGPU::V_FMAC_F16_t16_e64
5609-                                    : AMDGPU::V_FMAC_F16_fake16_e64;
5585+   case  AMDGPU::S_FMAC_F16: return  AMDGPU::V_FMAC_F16_fake16_e64;
56105586  case  AMDGPU::S_FMAMK_F32: return  AMDGPU::V_FMAMK_F32;
56115587  case  AMDGPU::S_FMAAK_F32: return  AMDGPU::V_FMAAK_F32;
56125588  case  AMDGPU::S_CMP_LT_F32: return  AMDGPU::V_CMP_LT_F32_e64;
0 commit comments