@@ -54,12 +54,12 @@ static bool isUsedAsMemCpySource(const Value *V, bool &OtherUse) {
5454}
5555
5656static void countNumMemAccesses (const Value *Ptr, unsigned &NumStores,
57- unsigned &NumLoads, const Function *F = nullptr ) {
57+ unsigned &NumLoads, const Function *F) {
5858 if (!isa<PointerType>(Ptr->getType ()))
5959 return ;
6060 for (const User *U : Ptr->users ())
6161 if (const Instruction *User = dyn_cast<Instruction>(U)) {
62- if (User->getParent ()->getParent () == F || !F ) {
62+ if (User->getParent ()->getParent () == F) {
6363 if (const auto *SI = dyn_cast<StoreInst>(User)) {
6464 if (SI->getPointerOperand () == Ptr && !SI->isVolatile ())
6565 NumStores++;
@@ -70,220 +70,68 @@ static void countNumMemAccesses(const Value *Ptr, unsigned &NumStores,
7070 }
7171 else if (const auto *GEP = dyn_cast<GetElementPtrInst>(User)) {
7272 if (GEP->getPointerOperand () == Ptr)
73- countNumMemAccesses (GEP, NumStores, NumLoads);
73+ countNumMemAccesses (GEP, NumStores, NumLoads, F );
7474 }
7575 }
7676 }
7777}
7878
79- static unsigned usesAroundCall (const CallBase *CB, const GlobalVariable *GV) {
80- unsigned Uses = 0 ;
81- std::set<const Value *> Ptrs;
82- Ptrs.insert (GV);
83-
84- const BasicBlock *BB = CB->getParent ();
85- const unsigned CutOff = 20 ;
86- BasicBlock::const_iterator II = CB->getIterator ();
87- for (unsigned N = 0 ; N < CutOff && II != BB->begin (); N++)
88- II--;
89- BasicBlock::const_iterator EE = CB->getIterator ();
90- for (unsigned N = 0 ; N < CutOff && EE != BB->end (); N++)
91- EE++;
92-
93- for (; II != EE; ++II) {
94- if (const auto *SI = dyn_cast<StoreInst>(II)) {
95- if (Ptrs.count (SI->getPointerOperand ()) && !SI->isVolatile ())
96- Uses++;
97- }
98- else if (const auto *LI = dyn_cast<LoadInst>(II)) {
99- if (Ptrs.count (LI->getPointerOperand ()) && !LI->isVolatile ())
100- Uses++;
101- }
102- else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
103- if (Ptrs.count (GEP->getPointerOperand ()))
104- Ptrs.insert (GEP);
105- }
106- }
107- return Uses;
108- }
109-
110- static unsigned usesEntryExit (const Function *F, const GlobalVariable *GV) {
111- unsigned Uses = 0 ;
112- std::set<const Value *> Ptrs;
113- Ptrs.insert (GV);
114-
115- const unsigned CutOff = 100 ;
116- const BasicBlock *BB = &F->getEntryBlock ();
117- unsigned N = 0 ;
118- for (BasicBlock::const_iterator II = BB->begin ();
119- II != BB->end () && N < CutOff; ++II, N++) {
120- if (const auto *SI = dyn_cast<StoreInst>(II)) {
121- if (Ptrs.count (SI->getPointerOperand ()) && !SI->isVolatile ())
122- Uses++;
123- }
124- else if (const auto *LI = dyn_cast<LoadInst>(II)) {
125- if (Ptrs.count (LI->getPointerOperand ()) && !LI->isVolatile ())
126- Uses++;
127- }
128- else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
129- if (Ptrs.count (GEP->getPointerOperand ()))
130- Ptrs.insert (GEP);
131- }
132- }
133-
134- Ptrs.clear ();
135- Ptrs.insert (GV);
136- unsigned ReturnBlockUses = 0 ;
137- unsigned NumReturnBlocks = 0 ;
138- for (auto &BBII : *F) {
139- if (isa<ReturnInst>(BBII.getTerminator ())) {
140- if (NumReturnBlocks++ > 0 ) {
141- ReturnBlockUses = 0 ;
142- break ;
143- }
144- BasicBlock::const_iterator EE = BBII.getTerminator ()->getIterator ();
145- BasicBlock::const_iterator II = EE;
146- for (unsigned N = 0 ; N < CutOff && II != BBII.begin (); N++)
147- II--;
148- for (; II != EE; ++II) {
149- if (const auto *SI = dyn_cast<StoreInst>(II)) {
150- if (Ptrs.count (SI->getPointerOperand ()) && !SI->isVolatile ())
151- ReturnBlockUses++;
152- }
153- else if (const auto *LI = dyn_cast<LoadInst>(II)) {
154- if (Ptrs.count (LI->getPointerOperand ()) && !LI->isVolatile ())
155- ReturnBlockUses++;
156- }
157- else if (const auto *GEP = dyn_cast<GetElementPtrInst>(II)) {
158- if (Ptrs.count (GEP->getPointerOperand ()))
159- Ptrs.insert (GEP);
160- }
161- }
162- }
163- }
164-
165- return Uses + ReturnBlockUses;
166- }
167-
16879unsigned SystemZTTIImpl::adjustInliningThreshold (const CallBase *CB) const {
16980 unsigned Bonus = 0 ;
170-
171-
172- // dbgs() << "INSTRCOUNT: " << CB->getCalledFunction()->getInstructionCount()
173- // << CB->getCalledFunction()->getName() << "\n";
174- // if (CB->getCalledFunction()->getInstructionCount() == 216)
175- // Bonus = 300;
176-
177- // if (Function *Callee = CB->getCalledFunction()) {
178- // const char *CallerFunName = CB->getParent()->getParent()->getName().data();
179- // const char *CalleeFunName = Callee->getName().data();
180-
181- // if (std::strcmp(CallerFunName , "S_regmatch") == 0) {
182- // if (std::strcmp(CalleeFunName, "S_reghopmaybe3") == 0 || // less important
183- // std::strcmp(CalleeFunName, "S_regcppop") == 0 ||
184- // std::strcmp(CalleeFunName, "S_regcppush") == 0)
185- // return 250;
186- // }
187- // }
188-
189- // Check inlining with memory accesses common to caller and callee
190- // - Around call in caller? entry/exit blocks in callee?
191- // - Globals used (much?) in both caller and callee
192- // - Specific type of pattern: load; inc/dec; store ?
193- // - non-volatile loads/stores?
194- // - int/fp loads/stores? ptr?
195- // - num occurences in caller?
196- // - or specifically 2+ functions inlined if many common accesses?
197- // - specifically 2+ functions getting same adress as argument (ptr)?
198- // - (ptr-args generally?)
199- if (const Function *Callee = CB->getCalledFunction ()) {
200- const Function *Caller = CB->getParent ()->getParent ();
201- const Module *M = Caller->getParent ();
202- std::set<const GlobalVariable *> CalleeGlobals;
203- std::set<const GlobalVariable *> CallerGlobals;
204- for (const GlobalVariable &Global : M->globals ())
205- for (const User *U : Global.users ())
206- if (const Instruction *User = dyn_cast<Instruction>(U)) {
207- if (User->getParent ()->getParent () == Callee)
208- CalleeGlobals.insert (&Global);
209- if (User->getParent ()->getParent () == Caller)
210- CallerGlobals.insert (&Global);
211- }
212-
213- for (auto *GV : CalleeGlobals)
214- if (CallerGlobals.count (GV)) {
215- unsigned CalleeStores = 0 , CalleeLoads = 0 ;
216- unsigned CallerStores = 0 , CallerLoads = 0 ;
217- countNumMemAccesses (GV, CalleeStores, CalleeLoads, Callee);
218- countNumMemAccesses (GV, CallerStores, CallerLoads, Caller);
219- if ((CalleeStores || CalleeLoads) && (CallerStores || CallerLoads)) {
220- // dbgs() << "GV: @" << GV->getName()
221- // << " " << *GV->getValueType()
222- // << " Callee: " << Callee->getName() << " S: " << CalleeStores
223- // << " L: " << CalleeLoads << " MEE: " << (CalleeStores + CalleeLoads)
224- // << " Callee-size: " << Callee->getInstructionCount()
225- // << " Caller: " << Caller->getName() << " S: " << CallerStores
226- // << " L: " << CallerLoads << " MER: " << (CallerStores + CallerLoads)
227- // << " Uses-around-call: " << usesAroundCall(CB, GV)
228- // << " Uses-entry-exit-callee: " << usesEntryExit(Callee, GV)
229- // << "\n";
230-
231- // const char *CallerFunName = CB->getParent()->getParent()->getName().data();
232- // const char *CalleeFunName = Callee->getName().data();
233- // if (std::strcmp(CallerFunName , "S_regmatch") == 0) {
234- // if (std::strcmp(CalleeFunName, "S_regcppop") == 0) {
235- // return 250;
236- // }
237- // if (std::strcmp(CalleeFunName, "S_regcppush") == 0) {
238- // return 250;
239- // }
240- if (// usesEntryExit(Callee, GV) >= 5 &&
241- Callee->getInstructionCount () < 250 &&
242-
243- // (CalleeStores >= 5 && CalleeLoads >= 5) &&
244- (CalleeStores + CalleeLoads) > 10 &&
245-
246- // CallerLoads > 25)
247- (CallerStores + CallerLoads) > 10 )
248- return 500 ;
249-
250- // if
251- // if ((CallerStores + CallerLoads) > 25)
252- // if (CallerLoads) > 25)
253-
254- // }
255- }
256- }
257- }
81+ const Function *Caller = CB->getParent ()->getParent ();
82+ const Function *Callee = CB->getCalledFunction ();
83+ if (!Callee)
84+ return 0 ;
85+ const Module *M = Caller->getParent ();
25886
25987 // Increase the threshold if an incoming argument is used only as a memcpy
26088 // source.
261- if (Function *Callee = CB-> getCalledFunction ())
262- for (Argument &Arg : Callee-> args ()) {
263- bool OtherUse = false ;
264- if ( isUsedAsMemCpySource (&Arg, OtherUse) && !OtherUse)
265- Bonus += 1000 ;
89+ for ( const Argument &Arg : Callee-> args ()) {
90+ bool OtherUse = false ;
91+ if ( isUsedAsMemCpySource (&Arg, OtherUse) && !OtherUse) {
92+ Bonus = 1000 ;
93+ break ;
26694 }
95+ }
26796
268- if (!Bonus) {
269- if (Function *Callee = CB->getCalledFunction ()) {
270- unsigned NumStores = 0 ;
271- unsigned NumLoads = 0 ;
272- for (unsigned OpIdx = 0 ; OpIdx != Callee->arg_size (); ++OpIdx) {
273- Value *CallerArg = CB->getArgOperand (OpIdx);
274- Argument *CalleeArg = Callee->getArg (OpIdx);
275- if (isa<AllocaInst>(CallerArg))
276- countNumMemAccesses (CalleeArg, NumStores, NumLoads);
97+ // Give bonus for globals used much in both caller and callee.
98+ std::set<const GlobalVariable *> CalleeGlobals;
99+ std::set<const GlobalVariable *> CallerGlobals;
100+ for (const GlobalVariable &Global : M->globals ())
101+ for (const User *U : Global.users ())
102+ if (const Instruction *User = dyn_cast<Instruction>(U)) {
103+ if (User->getParent ()->getParent () == Callee)
104+ CalleeGlobals.insert (&Global);
105+ if (User->getParent ()->getParent () == Caller)
106+ CallerGlobals.insert (&Global);
107+ }
108+ for (auto *GV : CalleeGlobals)
109+ if (CallerGlobals.count (GV)) {
110+ unsigned CalleeStores = 0 , CalleeLoads = 0 ;
111+ unsigned CallerStores = 0 , CallerLoads = 0 ;
112+ countNumMemAccesses (GV, CalleeStores, CalleeLoads, Callee);
113+ countNumMemAccesses (GV, CallerStores, CallerLoads, Caller);
114+ if ((CalleeStores + CalleeLoads) > 10 &&
115+ (CallerStores + CallerLoads) > 10 ) {
116+ Bonus = 1000 ;
117+ break ;
277118 }
278- // dbgs() << "NUM: " << NumStores << " " << NumLoads << "\n";
279- // Best on povray, but not doing stores slightly better on blender.
280- if (NumLoads > 10 )
281- Bonus += NumLoads * 50 ;
282- if (NumStores > 10 )
283- Bonus += NumStores * 50 ;
284- Bonus = std::min (Bonus, unsigned (1000 ));
285119 }
120+
121+ // Give bonus when Callee accesses an Alloca of Caller heavily.
122+ unsigned NumStores = 0 ;
123+ unsigned NumLoads = 0 ;
124+ for (unsigned OpIdx = 0 ; OpIdx != Callee->arg_size (); ++OpIdx) {
125+ Value *CallerArg = CB->getArgOperand (OpIdx);
126+ Argument *CalleeArg = Callee->getArg (OpIdx);
127+ if (isa<AllocaInst>(CallerArg))
128+ countNumMemAccesses (CalleeArg, NumStores, NumLoads, Callee);
286129 }
130+ if (NumLoads > 10 )
131+ Bonus += NumLoads * 50 ;
132+ if (NumStores > 10 )
133+ Bonus += NumStores * 50 ;
134+ Bonus = std::min (Bonus, unsigned (1000 ));
287135
288136 LLVM_DEBUG (if (Bonus)
289137 dbgs () << " ++ SZTTI Adding inlining bonus: " << Bonus << " \n " ;);
0 commit comments