1616// ===----------------------------------------------------------------------===//
1717
1818#include " AMDGPU.h"
19- #include " SILowerI1Copies.h"
20- #include " llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2119#include " llvm/CodeGen/MachineFunctionPass.h"
22- #include " llvm/CodeGen/MachineUniformityAnalysis.h"
23- #include " llvm/InitializePasses.h"
2420
2521#define DEBUG_TYPE " amdgpu-global-isel-divergence-lowering"
2622
@@ -46,146 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
4642
4743 void getAnalysisUsage (AnalysisUsage &AU) const override {
4844 AU.setPreservesCFG ();
49- AU.addRequired <MachineDominatorTree>();
50- AU.addRequired <MachinePostDominatorTree>();
51- AU.addRequired <MachineUniformityAnalysisPass>();
5245 MachineFunctionPass::getAnalysisUsage (AU);
5346 }
5447};
5548
56- class DivergenceLoweringHelper : public PhiLoweringHelper {
57- public:
58- DivergenceLoweringHelper (MachineFunction *MF, MachineDominatorTree *DT,
59- MachinePostDominatorTree *PDT,
60- MachineUniformityInfo *MUI);
61-
62- private:
63- MachineUniformityInfo *MUI = nullptr ;
64- MachineIRBuilder B;
65- Register buildRegCopyToLaneMask (Register Reg);
66-
67- public:
68- void markAsLaneMask (Register DstReg) const override ;
69- void getCandidatesForLowering (
70- SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override ;
71- void collectIncomingValuesFromPhi (
72- const MachineInstr *MI,
73- SmallVectorImpl<Incoming> &Incomings) const override ;
74- void replaceDstReg (Register NewReg, Register OldReg,
75- MachineBasicBlock *MBB) override ;
76- void buildMergeLaneMasks (MachineBasicBlock &MBB,
77- MachineBasicBlock::iterator I, const DebugLoc &DL,
78- Register DstReg, Register PrevReg,
79- Register CurReg) override ;
80- void constrainAsLaneMask (Incoming &In) override ;
81- };
82-
83- DivergenceLoweringHelper::DivergenceLoweringHelper (
84- MachineFunction *MF, MachineDominatorTree *DT,
85- MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
86- : PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}
87-
88- // _(s1) -> SReg_32/64(s1)
89- void DivergenceLoweringHelper::markAsLaneMask (Register DstReg) const {
90- assert (MRI->getType (DstReg) == LLT::scalar (1 ));
91-
92- if (MRI->getRegClassOrNull (DstReg)) {
93- if (MRI->constrainRegClass (DstReg, ST->getBoolRC ()))
94- return ;
95- llvm_unreachable (" Failed to constrain register class" );
96- }
97-
98- MRI->setRegClass (DstReg, ST->getBoolRC ());
99- }
100-
101- void DivergenceLoweringHelper::getCandidatesForLowering (
102- SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
103- LLT S1 = LLT::scalar (1 );
104-
105- // Add divergent i1 phis to the list
106- for (MachineBasicBlock &MBB : *MF) {
107- for (MachineInstr &MI : MBB.phis ()) {
108- Register Dst = MI.getOperand (0 ).getReg ();
109- if (MRI->getType (Dst) == S1 && MUI->isDivergent (Dst))
110- Vreg1Phis.push_back (&MI);
111- }
112- }
113- }
114-
115- void DivergenceLoweringHelper::collectIncomingValuesFromPhi (
116- const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
117- for (unsigned i = 1 ; i < MI->getNumOperands (); i += 2 ) {
118- Incomings.emplace_back (MI->getOperand (i).getReg (),
119- MI->getOperand (i + 1 ).getMBB (), Register ());
120- }
121- }
122-
123- void DivergenceLoweringHelper::replaceDstReg (Register NewReg, Register OldReg,
124- MachineBasicBlock *MBB) {
125- BuildMI (*MBB, MBB->getFirstNonPHI (), {}, TII->get (AMDGPU::COPY), OldReg)
126- .addReg (NewReg);
127- }
128-
129- // Copy Reg to new lane mask register, insert a copy after instruction that
130- // defines Reg while skipping phis if needed.
131- Register DivergenceLoweringHelper::buildRegCopyToLaneMask (Register Reg) {
132- Register LaneMask = createLaneMaskReg (MRI, LaneMaskRegAttrs);
133- MachineInstr *Instr = MRI->getVRegDef (Reg);
134- MachineBasicBlock *MBB = Instr->getParent ();
135- B.setInsertPt (*MBB, MBB->SkipPHIsAndLabels (std::next (Instr->getIterator ())));
136- B.buildCopy (LaneMask, Reg);
137- return LaneMask;
138- }
139-
140- // bb.previous
141- // %PrevReg = ...
142- //
143- // bb.current
144- // %CurReg = ...
145- //
146- // %DstReg - not defined
147- //
148- // -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
149- //
150- // bb.previous
151- // %PrevReg = ...
152- // %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
153- //
154- // bb.current
155- // %CurReg = ...
156- // %CurRegCopy:sreg_32(s1) = COPY %CurReg
157- // ...
158- // %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
159- // %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
160- // %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
161- //
162- // DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
163- void DivergenceLoweringHelper::buildMergeLaneMasks (
164- MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
165- Register DstReg, Register PrevReg, Register CurReg) {
166- // DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
167- // TODO: check if inputs are constants or results of a compare.
168-
169- Register PrevRegCopy = buildRegCopyToLaneMask (PrevReg);
170- Register CurRegCopy = buildRegCopyToLaneMask (CurReg);
171- Register PrevMaskedReg = createLaneMaskReg (MRI, LaneMaskRegAttrs);
172- Register CurMaskedReg = createLaneMaskReg (MRI, LaneMaskRegAttrs);
173-
174- B.setInsertPt (MBB, I);
175- B.buildInstr (AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
176- B.buildInstr (AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
177- B.buildInstr (OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
178- }
179-
180- void DivergenceLoweringHelper::constrainAsLaneMask (Incoming &In) { return ; }
181-
18249} // End anonymous namespace.
18350
18451INITIALIZE_PASS_BEGIN (AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
18552 " AMDGPU GlobalISel divergence lowering" , false , false )
186- INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
187- INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
188- INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
18953INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
19054 " AMDGPU GlobalISel divergence lowering" , false , false )
19155
@@ -200,12 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
20064
20165bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction (
20266 MachineFunction &MF) {
203- MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
204- MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
205- MachineUniformityInfo &MUI =
206- getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
207-
208- DivergenceLoweringHelper Helper (&MF, &DT, &PDT, &MUI);
209-
210- return Helper.lowerPhis ();
67+ return false ;
21168}
0 commit comments