@@ -1773,8 +1773,35 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
17731773}
17741774
17751775void AMDGPUCodeGenPassBuilder::addCodeGenPrepare (AddIRPass &addPass) const {
1776+ // AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
1777+ // deleted soon.
1778+
1779+ if (EnableLowerKernelArguments)
1780+ addPass (AMDGPULowerKernelArgumentsPass (TM));
1781+
1782+ // This lowering has been placed after codegenprepare to take advantage of
1783+ // address mode matching (which is why it isn't put with the LDS lowerings).
1784+ // It could be placed anywhere before uniformity annotations (an analysis
1785+ // that it changes by splitting up fat pointers into their components)
1786+ // but has been put before switch lowering and CFG flattening so that those
1787+ // passes can run on the more optimized control flow this pass creates in
1788+ // many cases.
1789+ //
1790+ // FIXME: This should ideally be put after the LoadStoreVectorizer.
1791+ // However, due to some annoying facts about ResourceUsageAnalysis,
1792+ // (especially as exercised in the resource-usage-dead-function test),
1793+ // we need all the function passes codegenprepare all the way through
1794+ // said resource usage analysis to run on the call graph produced
1795+ // before codegenprepare runs (because codegenprepare will knock some
1796+ // nodes out of the graph, which leads to function-level passes not
1797+ // being run on them, which causes crashes in the resource usage analysis).
1798+ addPass (AMDGPULowerBufferFatPointersPass (TM));
1799+
17761800 Base::addCodeGenPrepare (addPass);
17771801
1802+ if (isPassEnabled (EnableLoadStoreVectorizer))
1803+ addPass (LoadStoreVectorizerPass ());
1804+
17781805 // LowerSwitch pass may introduce unreachable blocks that can cause unexpected
17791806 // behavior for subsequent passes. Placing it here seems better that these
17801807 // blocks would get cleaned up by UnreachableBlockElim inserted next in the
0 commit comments