@@ -2610,22 +2610,15 @@ void InnerLoopVectorizer::createInductionResumeValue(
26102610 assert (VectorTripCount && " Expected valid arguments" );
26112611
26122612 Instruction *OldInduction = Legal->getPrimaryInduction ();
2613- Value *EndValue = nullptr ;
26142613 Value *EndValueFromAdditionalBypass = AdditionalBypass.second ;
26152614 if (OrigPhi == OldInduction) {
2616- // We know what the end value is.
2617- EndValue = VectorTripCount;
26182615 } else {
26192616 IRBuilder<> B (LoopVectorPreHeader->getTerminator ());
26202617
26212618 // Fast-math-flags propagate from the original induction instruction.
26222619 if (isa_and_nonnull<FPMathOperator>(II.getInductionBinOp ()))
26232620 B.setFastMathFlags (II.getInductionBinOp ()->getFastMathFlags ());
26242621
2625- EndValue = emitTransformedIndex (B, VectorTripCount, II.getStartValue (),
2626- Step, II.getKind (), II.getInductionBinOp ());
2627- EndValue->setName (" ind.end" );
2628-
26292622 // Compute the end value for the additional bypass (if applicable).
26302623 if (AdditionalBypass.first ) {
26312624 B.SetInsertPoint (AdditionalBypass.first ,
@@ -2637,26 +2630,6 @@ void InnerLoopVectorizer::createInductionResumeValue(
26372630 }
26382631 }
26392632
2640- VPBasicBlock *MiddleVPBB =
2641- cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
2642-
2643- VPBasicBlock *ScalarPHVPBB = nullptr ;
2644- if (MiddleVPBB->getNumSuccessors () == 2 ) {
2645- // Order is strict: first is the exit block, second is the scalar preheader.
2646- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
2647- } else {
2648- ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
2649- }
2650-
2651- VPBuilder ScalarPHBuilder (ScalarPHVPBB);
2652- auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
2653- VPInstruction::ResumePhi,
2654- {Plan.getOrAddLiveIn (EndValue), Plan.getOrAddLiveIn (II.getStartValue ())},
2655- OrigPhi->getDebugLoc (), " bc.resume.val" );
2656-
2657- auto *ScalarLoopHeader =
2658- cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
2659- addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
26602633 InductionBypassValues[OrigPhi] = {AdditionalBypass.first ,
26612634 EndValueFromAdditionalBypass};
26622635}
@@ -7704,10 +7677,22 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
77047677 ILV.getOrCreateVectorTripCount (nullptr ),
77057678 CanonicalIVStartValue, State);
77067679
7680+ VPBasicBlock *MiddleVPBB =
7681+ cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
7682+
7683+ VPBasicBlock *ScalarPHVPBB = nullptr ;
7684+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
7685+ // Order is strict: first is the exit block, second is the scalar
7686+ // preheader.
7687+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
7688+ } else {
7689+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
7690+ }
7691+
77077692 BestVPlan.execute (&State);
77087693
77097694 // 2.5 Collect reduction resume values.
7710- auto *ExitVPBB =
7695+ VPBasicBlock *ExitVPBB =
77117696 cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion ()->getSingleSuccessor ());
77127697 for (VPRecipeBase &R : *ExitVPBB) {
77137698 createAndCollectMergePhiForReduction (
@@ -7992,6 +7977,7 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
79927977 // Generate a resume induction for the vector epilogue and put it in the
79937978 // vector epilogue preheader
79947979 Type *IdxTy = Legal->getWidestInductionType ();
7980+
79957981 PHINode *EPResumeVal = PHINode::Create (IdxTy, 2 , " vec.epilog.resume.val" );
79967982 EPResumeVal->insertBefore (LoopVectorPreHeader->getFirstNonPHIIt ());
79977983 EPResumeVal->addIncoming (EPI.VectorTripCount , VecEpilogueIterationCountCheck);
@@ -8879,6 +8865,74 @@ addUsersInExitBlock(VPlan &Plan,
88798865 }
88808866}
88818867
8868+ static void addResumeValuesForInductions (VPlan &Plan) {
8869+ VPTypeAnalysis TypeInfo (Plan.getCanonicalIV ()->getScalarType ());
8870+ VPBasicBlock *Header = Plan.getVectorLoopRegion ()->getEntryBasicBlock ();
8871+
8872+ VPBuilder Builder (
8873+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSinglePredecessor ()));
8874+ for (VPRecipeBase &R : Header->phis ()) {
8875+ PHINode *OrigPhi;
8876+ const InductionDescriptor *ID;
8877+ VPValue *Start;
8878+ VPValue *Step;
8879+ Type *ScalarTy;
8880+ bool IsCanonical = false ;
8881+ if (auto *WideIV = dyn_cast<VPWidenIntOrFpInductionRecipe>(&R)) {
8882+ if (WideIV->getTruncInst ())
8883+ continue ;
8884+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8885+ ID = &WideIV->getInductionDescriptor ();
8886+ Start = WideIV->getStartValue ();
8887+ Step = WideIV->getStepValue ();
8888+ ScalarTy = WideIV->getScalarType ();
8889+ IsCanonical = WideIV->isCanonical ();
8890+ } else if (auto *WideIV = dyn_cast<VPWidenPointerInductionRecipe>(&R)) {
8891+ OrigPhi = cast<PHINode>(WideIV->getUnderlyingValue ());
8892+ ID = &WideIV->getInductionDescriptor ();
8893+ Start = WideIV->getStartValue ();
8894+ Step = WideIV->getOperand (1 );
8895+ ScalarTy = Start->getLiveInIRValue ()->getType ();
8896+ } else {
8897+ continue ;
8898+ }
8899+
8900+ VPValue *EndValue = &Plan.getVectorTripCount ();
8901+ if (!IsCanonical) {
8902+ EndValue = Builder.createDerivedIV (
8903+ ID->getKind (),
8904+ dyn_cast_or_null<FPMathOperator>(ID->getInductionBinOp ()), Start,
8905+ &Plan.getVectorTripCount (), Step);
8906+ }
8907+
8908+ if (ScalarTy != TypeInfo.inferScalarType (EndValue)) {
8909+ EndValue =
8910+ Builder.createScalarCast (Instruction::Trunc, EndValue, ScalarTy);
8911+ }
8912+
8913+ VPBasicBlock *MiddleVPBB =
8914+ cast<VPBasicBlock>(Plan.getVectorLoopRegion ()->getSingleSuccessor ());
8915+
8916+ VPBasicBlock *ScalarPHVPBB = nullptr ;
8917+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
8918+ // Order is strict: first is the exit block, second is the scalar
8919+ // preheader.
8920+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
8921+ } else {
8922+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
8923+ }
8924+
8925+ VPBuilder ScalarPHBuilder (ScalarPHVPBB);
8926+ auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp (
8927+ VPInstruction::ResumePhi, {EndValue, Start}, OrigPhi->getDebugLoc (),
8928+ " bc.resume.val" );
8929+
8930+ auto *ScalarLoopHeader =
8931+ cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ());
8932+ addOperandToPhiInVPIRBasicBlock (ScalarLoopHeader, OrigPhi, ResumePhiRecipe);
8933+ }
8934+ }
8935+
88828936// / Handle live-outs for first order reductions, both in the scalar preheader
88838937// / and the original exit block:
88848938// / 1. Feed a resume value for every FOR from the vector loop to the scalar
@@ -9174,6 +9228,7 @@ LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(VFRange &Range) {
91749228 OrigLoop, RecipeBuilder, *Plan, Legal->getInductionVars ());
91759229 addLiveOutsForFirstOrderRecurrences (*Plan, ExitUsersToFix);
91769230 addUsersInExitBlock (*Plan, ExitUsersToFix);
9231+ addResumeValuesForInductions (*Plan);
91779232
91789233 // ---------------------------------------------------------------------------
91799234 // Transform initial VPlan: Apply previously taken decisions, in order, to
@@ -9279,6 +9334,7 @@ VPlanPtr LoopVectorizationPlanner::buildVPlan(VFRange &Range) {
92799334 bool HasNUW = true ;
92809335 addCanonicalIVRecipes (*Plan, Legal->getWidestInductionType (), HasNUW,
92819336 DebugLoc ());
9337+ addResumeValuesForInductions (*Plan);
92829338 assert (verifyVPlanIsValid (*Plan) && " VPlan is invalid" );
92839339 return Plan;
92849340}
@@ -9562,7 +9618,8 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) {
95629618 State.Builder , CanonicalIV, getStartValue ()->getLiveInIRValue (), Step,
95639619 Kind, cast_if_present<BinaryOperator>(FPBinOp));
95649620 DerivedIV->setName (" offset.idx" );
9565- assert (DerivedIV != CanonicalIV && " IV didn't need transforming?" );
9621+ assert ((isa<Constant>(CanonicalIV) || DerivedIV != CanonicalIV) &&
9622+ " IV didn't need transforming?" );
95669623
95679624 State.set (this , DerivedIV, VPLane (0 ));
95689625}
@@ -10231,6 +10288,50 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1023110288 EPI, &LVL, &CM, BFI, PSI, Checks,
1023210289 *BestMainPlan);
1023310290
10291+ VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
10292+ // Collect PHI nodes of wide inductions in the VPlan for the epilogue. Those will need their resume-values computed from the main vector loop. Others can be removed in the main VPlan.
10293+ SmallPtrSet<PHINode *, 2 > WidenedPhis;
10294+ for (VPRecipeBase &R :
10295+ BestEpiPlan.getVectorLoopRegion ()->getEntryBasicBlock ()->phis ()) {
10296+ if (!isa<VPWidenIntOrFpInductionRecipe,
10297+ VPWidenPointerInductionRecipe>(&R))
10298+ continue ;
10299+ if (isa<VPWidenIntOrFpInductionRecipe>(&R))
10300+ WidenedPhis.insert (
10301+ cast<VPWidenIntOrFpInductionRecipe>(&R)->getPHINode ());
10302+ else
10303+ WidenedPhis.insert (
10304+ cast<PHINode>(R.getVPSingleValue ()->getUnderlyingValue ()));
10305+ }
10306+ VPBasicBlock *MiddleVPBB = cast<VPBasicBlock>(
10307+ BestMainPlan->getVectorLoopRegion ()->getSingleSuccessor ());
10308+
10309+ VPBasicBlock *ScalarPHVPBB = nullptr ;
10310+ if (MiddleVPBB->getNumSuccessors () == 2 ) {
10311+ // Order is strict: first is the exit block, second is the scalar
10312+ // preheader.
10313+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors ()[1 ]);
10314+ } else {
10315+ ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor ());
10316+ }
10317+
10318+ for (VPRecipeBase &R :
10319+ *cast<VPIRBasicBlock>(ScalarPHVPBB->getSingleSuccessor ())) {
10320+ auto *VPIRInst = cast<VPIRInstruction>(&R);
10321+ auto *IRI = dyn_cast<PHINode>(&VPIRInst->getInstruction ());
10322+ if (!IRI)
10323+ break ;
10324+ if (WidenedPhis.contains (IRI) ||
10325+ !LVL.getInductionVars ().contains (IRI))
10326+ continue ;
10327+ VPRecipeBase *ResumePhi =
10328+ VPIRInst->getOperand (0 )->getDefiningRecipe ();
10329+ VPIRInst->setOperand (0 , BestMainPlan->getOrAddLiveIn (
10330+ Constant::getNullValue (IRI->getType ())));
10331+ ResumePhi->eraseFromParent ();
10332+ }
10333+ VPlanTransforms::removeDeadRecipes (*BestMainPlan);
10334+
1023410335 auto ExpandedSCEVs = LVP.executePlan (EPI.MainLoopVF , EPI.MainLoopUF ,
1023510336 *BestMainPlan, MainILV, DT, true );
1023610337 ++LoopsVectorized;
@@ -10239,7 +10340,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
1023910340 // edges from the first pass.
1024010341 EPI.MainLoopVF = EPI.EpilogueVF ;
1024110342 EPI.MainLoopUF = EPI.EpilogueUF ;
10242- VPlan &BestEpiPlan = LVP.getPlanFor (EPI.EpilogueVF );
1024310343 EpilogueVectorizerEpilogueLoop EpilogILV (L, PSE, LI, DT, TLI, TTI, AC,
1024410344 ORE, EPI, &LVL, &CM, BFI, PSI,
1024510345 Checks, BestEpiPlan);
0 commit comments