1919#include " llvm/Analysis/AssumptionCache.h"
2020#include " llvm/Analysis/BasicAliasAnalysis.h"
2121#include " llvm/Analysis/ConstantFolding.h"
22- #include " llvm/Analysis/DomTreeUpdater.h"
2322#include " llvm/Analysis/GlobalsModRef.h"
2423#include " llvm/Analysis/TargetLibraryInfo.h"
2524#include " llvm/Analysis/TargetTransformInfo.h"
2928#include " llvm/IR/Function.h"
3029#include " llvm/IR/IRBuilder.h"
3130#include " llvm/IR/PatternMatch.h"
32- #include " llvm/Transforms/Utils/BasicBlockUtils.h"
3331#include " llvm/Transforms/Utils/BuildLibCalls.h"
3432#include " llvm/Transforms/Utils/Local.h"
3533
@@ -398,6 +396,54 @@ static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI) {
398396 return true ;
399397}
400398
399+ // / Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
400+ // / pessimistic codegen that has to account for setting errno and can enable
401+ // / vectorization.
402+ static bool foldSqrt (Instruction &I, TargetTransformInfo &TTI,
403+ TargetLibraryInfo &TLI, AssumptionCache &AC,
404+ DominatorTree &DT) {
405+ // Match a call to sqrt mathlib function.
406+ auto *Call = dyn_cast<CallInst>(&I);
407+ if (!Call)
408+ return false ;
409+
410+ Module *M = Call->getModule ();
411+ LibFunc Func;
412+ if (!TLI.getLibFunc (*Call, Func) || !isLibFuncEmittable (M, &TLI, Func))
413+ return false ;
414+
415+ if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
416+ return false ;
417+
418+ // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
419+ // (because NNAN or the operand arg must not be less than -0.0) and (2) we
420+ // would not end up lowering to a libcall anyway (which could change the value
421+ // of errno), then:
422+ // (1) errno won't be set.
423+ // (2) it is safe to convert this to an intrinsic call.
424+ Type *Ty = Call->getType ();
425+ Value *Arg = Call->getArgOperand (0 );
426+ if (TTI.haveFastSqrt (Ty) &&
427+ (Call->hasNoNaNs () ||
428+ cannotBeOrderedLessThanZero (Arg, M->getDataLayout (), &TLI, 0 , &AC, &I,
429+ &DT))) {
430+ IRBuilder<> Builder (&I);
431+ IRBuilderBase::FastMathFlagGuard Guard (Builder);
432+ Builder.setFastMathFlags (Call->getFastMathFlags ());
433+
434+ Function *Sqrt = Intrinsic::getDeclaration (M, Intrinsic::sqrt, Ty);
435+ Value *NewSqrt = Builder.CreateCall (Sqrt, Arg, " sqrt" );
436+ I.replaceAllUsesWith (NewSqrt);
437+
438+ // Explicitly erase the old call because a call with side effects is not
439+ // trivially dead.
440+ I.eraseFromParent ();
441+ return true ;
442+ }
443+
444+ return false ;
445+ }
446+
401447// Check if this array of constants represents a cttz table.
402448// Iterate over the elements from \p Table by trying to find/match all
403449// the numbers from 0 to \p InputBits that should represent cttz results.
@@ -869,199 +915,13 @@ static bool foldPatternedLoads(Instruction &I, const DataLayout &DL) {
869915 return true ;
870916}
871917
872- // / Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
873- // / pessimistic codegen that has to account for setting errno and can enable
874- // / vectorization.
875- static bool foldSqrt (CallInst *Call, TargetTransformInfo &TTI,
876- TargetLibraryInfo &TLI, AssumptionCache &AC,
877- DominatorTree &DT) {
878- Module *M = Call->getModule ();
879-
880- // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
881- // (because NNAN or the operand arg must not be less than -0.0) and (2) we
882- // would not end up lowering to a libcall anyway (which could change the value
883- // of errno), then:
884- // (1) errno won't be set.
885- // (2) it is safe to convert this to an intrinsic call.
886- Type *Ty = Call->getType ();
887- Value *Arg = Call->getArgOperand (0 );
888- if (TTI.haveFastSqrt (Ty) &&
889- (Call->hasNoNaNs () ||
890- cannotBeOrderedLessThanZero (Arg, M->getDataLayout (), &TLI, 0 , &AC, Call,
891- &DT))) {
892- IRBuilder<> Builder (Call);
893- IRBuilderBase::FastMathFlagGuard Guard (Builder);
894- Builder.setFastMathFlags (Call->getFastMathFlags ());
895-
896- Function *Sqrt = Intrinsic::getDeclaration (M, Intrinsic::sqrt, Ty);
897- Value *NewSqrt = Builder.CreateCall (Sqrt, Arg, " sqrt" );
898- Call->replaceAllUsesWith (NewSqrt);
899-
900- // Explicitly erase the old call because a call with side effects is not
901- // trivially dead.
902- Call->eraseFromParent ();
903- return true ;
904- }
905-
906- return false ;
907- }
908-
909- // / Try to expand strcmp(P, string_literal) where string_literal size is 1 or 2
910- static bool expandStrcmp (CallInst *CI, DominatorTree &DT, bool &MadeCFGChange) {
911- Value *Str1P = CI->getArgOperand (0 ), *Str2P = CI->getArgOperand (1 );
912-
913- // Trivial cases are optimized during inst combine
914- if (Str1P == Str2P)
915- return false ;
916-
917- StringRef Str1, Str2;
918- bool HasStr1 = getConstantStringInfo (Str1P, Str1);
919- bool HasStr2 = getConstantStringInfo (Str2P, Str2);
920-
921- Value *NonConstantP = nullptr ;
922- StringRef ConstantStr;
923-
924- if (!HasStr1 && HasStr2) {
925- NonConstantP = Str1P;
926- ConstantStr = Str2;
927- } else if (!HasStr2 && HasStr1) {
928- NonConstantP = Str2P;
929- ConstantStr = Str1;
930- } else {
931- return false ;
932- }
933-
934- size_t ConstantStrSize = ConstantStr.size ();
935-
936- // Trivial cases are optimized during inst combine
937- if (ConstantStrSize == 0 || ConstantStrSize > 2 )
938- return false ;
939-
940- // Check if strcmp result is only used in a comparison with zero
941- if (!isOnlyUsedInZeroComparison (CI))
942- return false ;
943-
944- // For strcmp(P, "x") do the following transformation:
945- //
946- // (before)
947- // dst = strcmp(P, "x")
948- //
949- // (after)
950- // v0 = P[0] - 'x'
951- // [if v0 == 0]
952- // v1 = P[1]
953- // dst = phi(v0, v1)
954- //
955- // For strcmp(P, "xy") do the following transformation:
956- //
957- // (before)
958- // dst = strcmp(P, "xy")
959- //
960- // (after)
961- // v0 = P[0] - 'x'
962- // [if v0 == 0]
963- // v1 = P[1] - 'y'
964- // [if v1 == 0]
965- // v2 = P[2]
966- // dst = phi(v0, v1, v2)
967- //
968-
969- IRBuilder<> B (CI->getParent ());
970- DomTreeUpdater DTU (DT, DomTreeUpdater::UpdateStrategy::Lazy);
971-
972- Type *RetType = CI->getType ();
973-
974- BasicBlock *InitialBB = CI->getParent ();
975- BasicBlock *JoinBlock = SplitBlock (InitialBB, CI, &DTU);
976- JoinBlock->setName (" strcmp_expand_sub_join" );
977-
978- B.SetInsertPoint (CI);
979- PHINode *ResultPHI = B.CreatePHI (RetType, ConstantStrSize + 1 );
980-
981- B.SetInsertPoint (InitialBB);
982- InitialBB->getTerminator ()->eraseFromParent ();
983-
984- SmallVector<DominatorTree::UpdateType, 4 > DTUpdates;
985-
986- size_t CharacterIndexToCheck = 0 ;
987- for (; CharacterIndexToCheck < ConstantStrSize; ++CharacterIndexToCheck) {
988- Value *StrCharacterValue = B.CreateZExt (
989- B.CreateLoad (B.getInt8Ty (),
990- B.CreateConstInBoundsGEP1_64 (B.getInt8Ty (), NonConstantP,
991- CharacterIndexToCheck)),
992- RetType);
993- Value *ConstantStrCharacterValue = ConstantInt::get (
994- RetType,
995- static_cast <unsigned char >(ConstantStr[CharacterIndexToCheck]));
996- Value *CharacterSub =
997- B.CreateNSWSub (StrCharacterValue, ConstantStrCharacterValue);
998- Value *CharacterSubIsZero =
999- B.CreateICmpEQ (CharacterSub, ConstantInt::get (RetType, 0 ));
1000- BasicBlock *CharacterSubIsZeroBB =
1001- BasicBlock::Create (B.getContext (), " strcmp_expand_sub_is_zero" ,
1002- InitialBB->getParent (), JoinBlock);
1003- B.CreateCondBr (CharacterSubIsZero, CharacterSubIsZeroBB, JoinBlock);
1004-
1005- ResultPHI->addIncoming (CharacterSub, B.GetInsertBlock ());
1006- DTUpdates.emplace_back (DominatorTree::Insert, B.GetInsertBlock (),
1007- CharacterSubIsZeroBB);
1008-
1009- B.SetInsertPoint (CharacterSubIsZeroBB);
1010- DTUpdates.emplace_back (DominatorTree::Insert, CharacterSubIsZeroBB,
1011- JoinBlock);
1012- }
1013-
1014- Value *StrLastCharacterValue = B.CreateZExt (
1015- B.CreateLoad (B.getInt8Ty (),
1016- B.CreateConstInBoundsGEP1_64 (B.getInt8Ty (), NonConstantP,
1017- CharacterIndexToCheck)),
1018- RetType);
1019- ResultPHI->addIncoming (StrLastCharacterValue, B.GetInsertBlock ());
1020- B.CreateBr (JoinBlock);
1021-
1022- DTU.applyUpdates (DTUpdates);
1023-
1024- CI->replaceAllUsesWith (ResultPHI);
1025- CI->eraseFromParent ();
1026-
1027- MadeCFGChange = true ;
1028-
1029- return true ;
1030- }
1031-
1032- static bool foldLibraryCalls (Instruction &I, TargetTransformInfo &TTI,
1033- TargetLibraryInfo &TLI, DominatorTree &DT,
1034- AssumptionCache &AC, bool &MadeCFGChange) {
1035- CallInst *CI = dyn_cast<CallInst>(&I);
1036- if (!CI)
1037- return false ;
1038-
1039- LibFunc Func;
1040- Module *M = I.getModule ();
1041- if (!TLI.getLibFunc (*CI, Func) || !isLibFuncEmittable (M, &TLI, Func))
1042- return false ;
1043-
1044- switch (Func) {
1045- case LibFunc_sqrt:
1046- case LibFunc_sqrtf:
1047- case LibFunc_sqrtl:
1048- return foldSqrt (CI, TTI, TLI, AC, DT);
1049- case LibFunc_strcmp:
1050- return expandStrcmp (CI, DT, MadeCFGChange);
1051- default :
1052- break ;
1053- }
1054-
1055- return false ;
1056- }
1057-
1058918// / This is the entry point for folds that could be implemented in regular
1059919// / InstCombine, but they are separated because they are not expected to
1060920// / occur frequently and/or have more than a constant-length pattern match.
1061921static bool foldUnusualPatterns (Function &F, DominatorTree &DT,
1062922 TargetTransformInfo &TTI,
1063923 TargetLibraryInfo &TLI, AliasAnalysis &AA,
1064- AssumptionCache &AC, bool &MadeCFGChange ) {
924+ AssumptionCache &AC) {
1065925 bool MadeChange = false ;
1066926 for (BasicBlock &BB : F) {
1067927 // Ignore unreachable basic blocks.
@@ -1086,7 +946,7 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
1086946 // NOTE: This function introduces erasing of the instruction `I`, so it
1087947 // needs to be called at the end of this sequence, otherwise we may make
1088948 // bugs.
1089- MadeChange |= foldLibraryCalls (I, TTI, TLI, DT, AC, MadeCFGChange );
949+ MadeChange |= foldSqrt (I, TTI, TLI, AC, DT );
1090950 }
1091951 }
1092952
@@ -1102,12 +962,12 @@ static bool foldUnusualPatterns(Function &F, DominatorTree &DT,
1102962// / handled in the callers of this function.
1103963static bool runImpl (Function &F, AssumptionCache &AC, TargetTransformInfo &TTI,
1104964 TargetLibraryInfo &TLI, DominatorTree &DT,
1105- AliasAnalysis &AA, bool &ChangedCFG ) {
965+ AliasAnalysis &AA) {
1106966 bool MadeChange = false ;
1107967 const DataLayout &DL = F.getParent ()->getDataLayout ();
1108968 TruncInstCombine TIC (AC, TLI, DL, DT);
1109969 MadeChange |= TIC.run (F);
1110- MadeChange |= foldUnusualPatterns (F, DT, TTI, TLI, AA, AC, ChangedCFG );
970+ MadeChange |= foldUnusualPatterns (F, DT, TTI, TLI, AA, AC);
1111971 return MadeChange;
1112972}
1113973
@@ -1118,21 +978,12 @@ PreservedAnalyses AggressiveInstCombinePass::run(Function &F,
1118978 auto &DT = AM.getResult <DominatorTreeAnalysis>(F);
1119979 auto &TTI = AM.getResult <TargetIRAnalysis>(F);
1120980 auto &AA = AM.getResult <AAManager>(F);
1121-
1122- bool MadeCFGChange = false ;
1123-
1124- if (!runImpl (F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
981+ if (!runImpl (F, AC, TTI, TLI, DT, AA)) {
1125982 // No changes, all analyses are preserved.
1126983 return PreservedAnalyses::all ();
1127984 }
1128-
1129985 // Mark all the analyses that instcombine updates as preserved.
1130986 PreservedAnalyses PA;
1131-
1132- if (MadeCFGChange)
1133- PA.preserve <DominatorTreeAnalysis>();
1134- else
1135- PA.preserveSet <CFGAnalyses>();
1136-
987+ PA.preserveSet <CFGAnalyses>();
1137988 return PA;
1138989}
0 commit comments