2121#include " llvm/Transforms/Utils/LCSSA.h"
2222#include " llvm/Transforms/Utils/LowerSwitch.h"
2323#include " llvm/Transforms/Utils/UnifyLoopExits.h"
24+ #include " llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
2425
2526using namespace llvm ;
27+ using namespace llvm ::AMDGPU;
2628
2729AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder (
2830 GCNTargetMachine &TM, const CGPassBuilderOption &Opts,
@@ -37,8 +39,35 @@ AMDGPUCodeGenPassBuilder::AMDGPUCodeGenPassBuilder(
3739}
3840
3941void AMDGPUCodeGenPassBuilder::addCodeGenPrepare (AddIRPass &addPass) const {
42+ // AMDGPUAnnotateKernelFeaturesPass is missing here, but it will hopefully be
43+ // deleted soon.
44+
45+ if (EnableLowerKernelArguments)
46+ addPass (AMDGPULowerKernelArgumentsPass (TM));
47+
48+ // This lowering has been placed after codegenprepare to take advantage of
49+ // address mode matching (which is why it isn't put with the LDS lowerings).
50+ // It could be placed anywhere before uniformity annotations (an analysis
51+ // that it changes by splitting up fat pointers into their components)
52+ // but has been put before switch lowering and CFG flattening so that those
53+ // passes can run on the more optimized control flow this pass creates in
54+ // many cases.
55+ //
56+ // FIXME: This should ideally be put after the LoadStoreVectorizer.
57+ // However, due to some annoying facts about ResourceUsageAnalysis,
58+ // (especially as exercised in the resource-usage-dead-function test),
59+ // we need all the function passes codegenprepare all the way through
60+ // said resource usage analysis to run on the call graph produced
61+ // before codegenprepare runs (because codegenprepare will knock some
62+ // nodes out of the graph, which leads to function-level passes not
63+ // being run on them, which causes crashes in the resource usage analysis).
64+ addPass (AMDGPULowerBufferFatPointersPass (TM));
65+
4066 Base::addCodeGenPrepare (addPass);
4167
68+ if (isPassEnabled (EnableLoadStoreVectorizer))
69+ addPass (LoadStoreVectorizerPass ());
70+
4271 // LowerSwitch pass may introduce unreachable blocks that can cause unexpected
4372 // behavior for subsequent passes. Placing it here seems better that these
4473 // blocks would get cleaned up by UnreachableBlockElim inserted next in the
@@ -106,3 +135,12 @@ Error AMDGPUCodeGenPassBuilder::addInstSelector(AddMachinePass &addPass) const {
106135 addPass (SILowerI1CopiesPass ());
107136 return Error::success ();
108137}
138+
139+ bool AMDGPUCodeGenPassBuilder::isPassEnabled (const cl::opt<bool > &Opt,
140+ CodeGenOptLevel Level) const {
141+ if (Opt.getNumOccurrences ())
142+ return Opt;
143+ if (TM.getOptLevel () < Level)
144+ return false ;
145+ return Opt;
146+ }
0 commit comments