Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4838,6 +4838,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
//
DoPhase(this, PHASE_UNROLL_LOOPS, &Compiler::optUnrollLoops);

// Peel loops
//
DoPhase(this, PHASE_PEEL_LOOPS, &Compiler::optPeelLoops);

// Compute dominators and exceptional entry blocks
//
DoPhase(this, PHASE_COMPUTE_DOMINATORS, &Compiler::fgComputeDominators);
Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6786,10 +6786,15 @@ class Compiler

PhaseStatus optCloneLoops();
void optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* context);

PhaseStatus optUnrollLoops(); // Unrolls loops (needs to have cost info)
bool optTryUnrollLoop(FlowGraphNaturalLoop* loop, bool* changedIR);
void optRedirectPrevUnrollIteration(FlowGraphNaturalLoop* loop, BasicBlock* prevTestBlock, BasicBlock* target);
void optReplaceScalarUsesWithConst(BasicBlock* block, unsigned lclNum, ssize_t cnsVal);

PhaseStatus optPeelLoops();
bool optPeelLoop(FlowGraphNaturalLoop* loop);

void optRemoveRedundantZeroInits();
PhaseStatus optIfConversion(); // If conversion

Expand Down Expand Up @@ -9966,6 +9971,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
STRESS_MODE(DBL_ALN) \
STRESS_MODE(LCL_FLDS) \
STRESS_MODE(UNROLL_LOOPS) \
STRESS_MODE(PEEL_LOOPS) \
STRESS_MODE(MAKE_CSE) \
STRESS_MODE(LEGACY_INLINE) \
STRESS_MODE(CLONE_EXPR) \
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compmemkind.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ CompMemKindMacro(LoopOpt)
CompMemKindMacro(LoopClone)
CompMemKindMacro(LoopUnroll)
CompMemKindMacro(LoopHoist)
CompMemKindMacro(LoopPeel)
CompMemKindMacro(Unknown)
CompMemKindMacro(RangeCheck)
CompMemKindMacro(CopyProp)
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/compphases.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ CompPhaseNameMacro(PHASE_ZERO_INITS, "Redundant zero Inits",
CompPhaseNameMacro(PHASE_FIND_LOOPS, "Find loops", false, -1, false)
CompPhaseNameMacro(PHASE_CLONE_LOOPS, "Clone loops", false, -1, false)
CompPhaseNameMacro(PHASE_UNROLL_LOOPS, "Unroll loops", false, -1, false)
CompPhaseNameMacro(PHASE_PEEL_LOOPS, "Peel loops", false, -1, false)
CompPhaseNameMacro(PHASE_MORPH_MDARR, "Morph array ops", false, -1, false)
CompPhaseNameMacro(PHASE_HOIST_LOOP_CODE, "Hoist loop code", false, -1, false)
CompPhaseNameMacro(PHASE_MARK_LOCAL_VARS, "Mark local vars", false, -1, false)
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,7 @@ CONFIG_STRING(JitOnlyOptimizeRange,
W("JitOnlyOptimizeRange")) // If set, all methods that do _not_ match are forced into MinOpts
CONFIG_STRING(JitEnablePhysicalPromotionRange, W("JitEnablePhysicalPromotionRange"))
CONFIG_STRING(JitEnableCrossBlockLocalAssertionPropRange, W("JitEnableCrossBlockLocalAssertionPropRange"))
CONFIG_STRING(JitEnableLoopPeelingRange, W("JitEnableLoopPeelingRange"))

CONFIG_INTEGER(JitDoSsa, W("JitDoSsa"), 1) // Perform Static Single Assignment (SSA) numbering on the variables
CONFIG_INTEGER(JitDoValueNumber, W("JitDoValueNumber"), 1) // Perform value numbering on method expressions
Expand Down Expand Up @@ -659,6 +660,8 @@ CONFIG_INTEGER(JitEnableHeadTailMerge, W("JitEnableHeadTailMerge"), 1)
// Enable physical promotion
CONFIG_INTEGER(JitEnablePhysicalPromotion, W("JitEnablePhysicalPromotion"), 1)

CONFIG_INTEGER(JitEnableLoopPeeling, W("JitEnableLoopPeeling"), 0)

// Enable cross-block local assertion prop
CONFIG_INTEGER(JitEnableCrossBlockLocalAssertionProp, W("JitEnableCrossBlockLocalAssertionProp"), 1)

Expand Down
128 changes: 128 additions & 0 deletions src/coreclr/jit/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1794,6 +1794,134 @@ void Compiler::optReplaceScalarUsesWithConst(BasicBlock* block, unsigned lclNum,
}
}

//-----------------------------------------------------------------------------
// optPeelLoops: Peel loops by duplicating the loop body once.
//
// Returns:
// Suitable phase status.
//
PhaseStatus Compiler::optPeelLoops()
{
if (m_loops->NumLoops() == 0)
{
return PhaseStatus::MODIFIED_NOTHING;
}

// if ((JitConfig.JitEnableLoopPeeling() == 0) && !compStressCompile(STRESS_PEEL_LOOPS, 25))
//{
// return PhaseStatus::MODIFIED_NOTHING;
//}

#ifdef DEBUG
static ConfigMethodRange s_range;
s_range.EnsureInit(JitConfig.JitEnableLoopPeelingRange());

if (!s_range.Contains(info.compMethodHash()))
{
return PhaseStatus::MODIFIED_NOTHING;
}
#endif

unsigned numPeeled = 0;
for (FlowGraphNaturalLoop* loop : m_loops->InReversePostOrder())
{
if (loop->GetParent() != nullptr)
{
continue;
}

if (optPeelLoop(loop))
{
numPeeled++;
}
}

JITDUMP("Peeled %u loops\n", numPeeled);
if (numPeeled == 0)
{
return PhaseStatus::MODIFIED_NOTHING;
}

fgInvalidateDfsTree();
m_dfsTree = fgComputeDfs();
m_loops = FlowGraphNaturalLoops::Find(m_dfsTree);
fgRenumberBlocks();

return PhaseStatus::MODIFIED_EVERYTHING;
}

//-----------------------------------------------------------------------------
// optPeelLoop: Peel the specified loop by duplicating its loop body once
// before the loop.
//
// Returns:
// True if the loop was peeled and the flow graph was changed; otherwise false.
//
bool Compiler::optPeelLoop(FlowGraphNaturalLoop* loop)
{
JITDUMP("Considering peeling ");
DBEXEC(verbose, FlowGraphNaturalLoop::Dump(loop));

BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock();

INDEBUG(const char* reason);
if (!loop->CanDuplicate(INDEBUG(&reason)))
{
JITDUMP(" Cannot peel: %s\n", reason);
return false;
}

if (!BasicBlock::sameEHRegion(preheader, loop->GetHeader()))
{
JITDUMP(" Cannot peel: preheader and header are in different EH regions\n");
return false;
}

// Make a new pre-header block for the fast loop.
JITDUMP("Create new preheader block for fast loop\n");

BasicBlock* newPreheader =
fgNewBBafter(BBJ_ALWAYS, preheader, /*extendRegion*/ true, /*jumpDest*/ loop->GetHeader());
JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", newPreheader->bbNum, preheader->bbNum);
newPreheader->inheritWeight(preheader);
newPreheader->SetFlags(BBF_LOOP_PREHEADER);

if (newPreheader->JumpsToNext())
{
newPreheader->SetFlags(BBF_NONE_QUIRK);
}

fgAddRefPred(loop->GetHeader(), newPreheader);

assert(preheader->KindIs(BBJ_ALWAYS));
assert(preheader->TargetIs(loop->GetHeader()));

preheader->RemoveFlags(BBF_LOOP_PREHEADER);

// Now duplicate the loop blocks after the old preeheader but before the
// new preheader. This will be the singly peeled iteration.
BasicBlock* insertAfter = preheader;
BlockToBlockMap blockMap(getAllocator(CMK_LoopPeel));
weight_t scale = 1;
if (!loop->GetHeader()->isRunRarely() && !fgProfileWeightsEqual(loop->GetHeader()->getBBWeight(this), 0))
{
scale = preheader->getBBWeight(this) / loop->GetHeader()->getBBWeight(this);
}

loop->Duplicate(&insertAfter, &blockMap, scale, /* bottomNeedsRedirection */ true);

// Redirect all backedges to the new preheader we created. This removes the
// loop structure of the duplicate.
for (FlowEdge* backedge : loop->BackEdges())
{
fgReplaceJumpTarget(blockMap[backedge->getSourceBlock()], newPreheader, blockMap[loop->GetHeader()]);
}

// Finally, the old preheader now jumps to the first peeled iteration.
fgReplaceJumpTarget(preheader, blockMap[loop->GetHeader()], loop->GetHeader());
return true;
}

Compiler::OptInvertCountTreeInfoType Compiler::optInvertCountTreeInfo(GenTree* tree)
{
class CountTreeInfoVisitor : public GenTreeVisitor<CountTreeInfoVisitor>
Expand Down