1919#include " llvm/Analysis/AssumptionCache.h"
2020#include " llvm/Analysis/BasicAliasAnalysis.h"
2121#include " llvm/Analysis/ConstantFolding.h"
22+ #include " llvm/Analysis/DomTreeUpdater.h"
2223#include " llvm/Analysis/GlobalsModRef.h"
2324#include " llvm/Analysis/TargetLibraryInfo.h"
2425#include " llvm/Analysis/TargetTransformInfo.h"
2829#include " llvm/IR/Function.h"
2930#include " llvm/IR/IRBuilder.h"
3031#include " llvm/IR/PatternMatch.h"
32+ #include " llvm/Transforms/Utils/BasicBlockUtils.h"
3133#include " llvm/Transforms/Utils/BuildLibCalls.h"
3234#include " llvm/Transforms/Utils/Local.h"
3335
@@ -396,54 +398,6 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
396398 return true ;
397399}
398400
399- // / Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
400- // / pessimistic codegen that has to account for setting errno and can enable
401- // / vectorization.
402- static bool foldSqrt (Instruction &I, TargetTransformInfo &TTI,
403- TargetLibraryInfo &TLI, AssumptionCache &AC,
404- DominatorTree &DT) {
405- // Match a call to sqrt mathlib function.
406- auto *Call = dyn_cast<CallInst>(&I);
407- if (!Call)
408- return false ;
409-
410- Module *M = Call->getModule ();
411- LibFunc Func;
412- if (!TLI.getLibFunc (*Call, Func) || !isLibFuncEmittable (M, &TLI, Func))
413- return false ;
414-
415- if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
416- return false ;
417-
418- // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
419- // (because NNAN or the operand arg must not be less than -0.0) and (2) we
420- // would not end up lowering to a libcall anyway (which could change the value
421- // of errno), then:
422- // (1) errno won't be set.
423- // (2) it is safe to convert this to an intrinsic call.
424- Type *Ty = Call->getType ();
425- Value *Arg = Call->getArgOperand (0 );
426- if (TTI.haveFastSqrt (Ty) &&
427- (Call->hasNoNaNs () ||
428- cannotBeOrderedLessThanZero (Arg, M->getDataLayout (), &TLI, 0 , &AC, &I,
429- &DT))) {
430- IRBuilder<> Builder (&I);
431- IRBuilderBase::FastMathFlagGuard Guard (Builder);
432- Builder.setFastMathFlags (Call->getFastMathFlags ());
433-
434- Function *Sqrt = Intrinsic::getDeclaration (M, Intrinsic::sqrt, Ty);
435- Value *NewSqrt = Builder.CreateCall (Sqrt, Arg, " sqrt" );
436- I.replaceAllUsesWith (NewSqrt);
437-
438- // Explicitly erase the old call because a call with side effects is not
439- // trivially dead.
440- I.eraseFromParent ();
441- return true ;
442- }
443-
444- return false ;
445- }
446-
447401// Check if this array of constants represents a cttz table.
448402// Iterate over the elements from \p Table by trying to find/match all
449403// the numbers from 0 to \p InputBits that should represent cttz results.
@@ -915,13 +869,159 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
915869 return true ;
916870}
917871
872+ // / Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
873+ // / pessimistic codegen that has to account for setting errno and can enable
874+ // / vectorization.
875+ static bool foldSqrt (CallInst *Call, TargetTransformInfo &TTI,
876+ TargetLibraryInfo &TLI, AssumptionCache &AC,
877+ DominatorTree &DT) {
878+ Module *M = Call->getModule ();
879+
880+ // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
881+ // (because NNAN or the operand arg must not be less than -0.0) and (2) we
882+ // would not end up lowering to a libcall anyway (which could change the value
883+ // of errno), then:
884+ // (1) errno won't be set.
885+ // (2) it is safe to convert this to an intrinsic call.
886+ Type *Ty = Call->getType ();
887+ Value *Arg = Call->getArgOperand (0 );
888+ if (TTI.haveFastSqrt (Ty) &&
889+ (Call->hasNoNaNs () ||
890+ cannotBeOrderedLessThanZero (Arg, M->getDataLayout (), &TLI, 0 , &AC, Call,
891+ &DT))) {
892+ IRBuilder<> Builder (Call);
893+ IRBuilderBase::FastMathFlagGuard Guard (Builder);
894+ Builder.setFastMathFlags (Call->getFastMathFlags ());
895+
896+ Function *Sqrt = Intrinsic::getDeclaration (M, Intrinsic::sqrt, Ty);
897+ Value *NewSqrt = Builder.CreateCall (Sqrt, Arg, " sqrt" );
898+ Call->replaceAllUsesWith (NewSqrt);
899+
900+ // Explicitly erase the old call because a call with side effects is not
901+ // trivially dead.
902+ Call->eraseFromParent ();
903+ return true ;
904+ }
905+
906+ return false ;
907+ }
908+
909+ // / Try to expand strcmp(P, "x") calls.
910+ static bool expandStrcmp (CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) {
911+ Value *Str1P = CI->getArgOperand (0 ), *Str2P = CI->getArgOperand (1 );
912+
913+ // Trivial cases are optimized during inst combine
914+ if (Str1P == Str2P)
915+ return false ;
916+
917+ StringRef Str1, Str2;
918+ bool HasStr1 = getConstantStringInfo (Str1P, Str1);
919+ bool HasStr2 = getConstantStringInfo (Str2P, Str2);
920+
921+ Value *NonConstantP = nullptr ;
922+ StringRef ConstantStr;
923+
924+ if (!HasStr1 && HasStr2 && Str2.size () == 1 ) {
925+ NonConstantP = Str1P;
926+ ConstantStr = Str2;
927+ } else if (!HasStr2 && HasStr1 && Str1.size () == 1 ) {
928+ NonConstantP = Str2P;
929+ ConstantStr = Str1;
930+ } else {
931+ return false ;
932+ }
933+
934+ // Check if strcmp result is only used in a comparison with zero
935+ if (!isOnlyUsedInZeroComparison (CI))
936+ return false ;
937+
938+ // For strcmp(P, "x") do the following transformation:
939+ //
940+ // (before)
941+ // dst = strcmp(P, "x")
942+ //
943+ // (after)
944+ // v0 = P[0] - 'x'
945+ // [if v0 == 0]
946+ // v1 = P[1]
947+ // dst = phi(v0, v1)
948+ //
949+
950+ IRBuilder<> B (CI->getParent ());
951+ DomTreeUpdater DTU (DT, DomTreeUpdater::UpdateStrategy::Lazy);
952+
953+ Type *RetType = CI->getType ();
954+
955+ B.SetInsertPoint (CI);
956+ BasicBlock *InitialBB = B.GetInsertBlock ();
957+ Value *Str1FirstCharacterValue =
958+ B.CreateZExt (B.CreateLoad (B.getInt8Ty (), NonConstantP), RetType);
959+ Value *Str2FirstCharacterValue =
960+ ConstantInt::get (RetType, static_cast <unsigned char >(ConstantStr[0 ]));
961+ Value *FirstCharacterSub =
962+ B.CreateNSWSub (Str1FirstCharacterValue, Str2FirstCharacterValue);
963+ Value *IsFirstCharacterSubZero =
964+ B.CreateICmpEQ (FirstCharacterSub, ConstantInt::get (RetType, 0 ));
965+ Instruction *IsFirstCharacterSubZeroBBTerminator = SplitBlockAndInsertIfThen (
966+ IsFirstCharacterSubZero, CI, /* Unreachable*/ false ,
967+ /* BranchWeights*/ nullptr , &DTU);
968+
969+ B.SetInsertPoint (IsFirstCharacterSubZeroBBTerminator);
970+ B.GetInsertBlock ()->setName (" strcmp_expand_sub_is_zero" );
971+ BasicBlock *IsFirstCharacterSubZeroBB = B.GetInsertBlock ();
972+ Value *Str1SecondCharacterValue = B.CreateZExt (
973+ B.CreateLoad (B.getInt8Ty (), B.CreateConstInBoundsGEP1_64 (
974+ B.getInt8Ty (), NonConstantP, 1 )),
975+ RetType);
976+
977+ B.SetInsertPoint (CI);
978+ B.GetInsertBlock ()->setName (" strcmp_expand_sub_join" );
979+
980+ PHINode *Result = B.CreatePHI (RetType, 2 );
981+ Result->addIncoming (FirstCharacterSub, InitialBB);
982+ Result->addIncoming (Str1SecondCharacterValue, IsFirstCharacterSubZeroBB);
983+
984+ CI->replaceAllUsesWith (Result);
985+ CI->eraseFromParent ();
986+
987+ MadeCFGChange = true ;
988+
989+ return true ;
990+ }
991+
992+ static bool foldLibraryCalls (Instruction &I, TargetTransformInfo &TTI,
993+ TargetLibraryInfo &TLI, DominatorTree &DT,
994+ AssumptionCache &AC, bool &MadeCFGChange) {
995+ CallInst *CI = dyn_cast<CallInst>(&I);
996+ if (!CI)
997+ return false ;
998+
999+ LibFunc Func;
1000+ Module *M = I.getModule ();
1001+ if (!TLI.getLibFunc (*CI, Func) || !isLibFuncEmittable (M, &TLI, Func))
1002+ return false ;
1003+
1004+ switch (Func) {
1005+ case LibFunc_sqrt:
1006+ case LibFunc_sqrtf:
1007+ case LibFunc_sqrtl:
1008+ return foldSqrt (CI, TTI, TLI, AC, DT);
1009+ case LibFunc_strcmp:
1010+ return expandStrcmp (CI, DT, MadeCFGChange);
1011+ default :
1012+ break ;
1013+ }
1014+
1015+ return false ;
1016+ }
1017+
9181018// / This is the entry point for folds that could be implemented in regular
9191019// / InstCombine, but they are separated because they are not expected to
9201020// / occur frequently and/or have more than a constant-length pattern match.
9211021static bool foldUnusualPatterns (Function &F, DominatorTree &DT,
9221022 TargetTransformInfo &TTI,
9231023 TargetLibraryInfo &TLI, AliasAnalysis &AA,
924- AssumptionCache &AC) {
1024+ AssumptionCache &AC, bool &MadeCFGChange ) {
9251025 bool MadeChange = false ;
9261026 for (BasicBlock &BB : F) {
9271027 // Ignore unreachable basic blocks.
@@ -946,7 +1046,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
9461046 // NOTE: This function introduces erasing of the instruction `I`, so it
9471047 // needs to be called at the end of this sequence, otherwise we may make
9481048 // bugs.
949- MadeChange |= foldSqrt (I, TTI, TLI, AC, DT );
1049+ MadeChange |= foldLibraryCalls (I, TTI, TLI, DT, AC, MadeCFGChange );
9501050 }
9511051 }
9521052
@@ -962,12 +1062,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
9621062// / handled in the callers of this function.
9631063static bool runImpl (Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
9641064 TargetLibraryInfo &TLI, DominatorTree &DT,
965- AliasAnalysis &AA) {
1065+ AliasAnalysis &AA, bool &ChangedCFG ) {
9661066 bool MadeChange = false ;
9671067 const DataLayout &DL = F.getParent ()->getDataLayout ();
9681068 TruncInstCombine TIC (AC, TLI, DL, DT);
9691069 MadeChange |= TIC.run (F);
970- MadeChange |= foldUnusualPatterns (F, DT, TTI, TLI, AA, AC);
1070+ MadeChange |= foldUnusualPatterns (F, DT, TTI, TLI, AA, AC, ChangedCFG );
9711071 return MadeChange;
9721072}
9731073
@@ -978,12 +1078,21 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
9781078 auto &DT = AM.getResult <DominatorTreeAnalysis>(F);
9791079 auto &TTI = AM.getResult <TargetIRAnalysis>(F);
9801080 auto &AA = AM.getResult <AAManager>(F);
981- if (!runImpl (F, AC, TTI, TLI, DT, AA)) {
1081+
1082+ bool MadeCFGChange = false ;
1083+
1084+ if (!runImpl (F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
9821085 // No changes, all analyses are preserved.
9831086 return PreservedAnalyses::all ();
9841087 }
1088+
9851089 // Mark all the analyses that instcombine updates as preserved.
9861090 PreservedAnalyses PA;
987- PA.preserveSet <CFGAnalyses>();
1091+
1092+ if (MadeCFGChange)
1093+ PA.preserve <DominatorTreeAnalysis>();
1094+ else
1095+ PA.preserveSet <CFGAnalyses>();
1096+
9881097 return PA;
9891098}
0 commit comments