From a04ad03df6c8ff49ede7cb6c939747ee70ce93d3 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 30 Dec 2024 18:29:36 -0500 Subject: [PATCH 1/8] Propagate profile data in head-tail merging --- src/coreclr/jit/compiler.cpp | 15 ++++++++++----- src/coreclr/jit/fgopt.cpp | 7 +++++++ 2 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 901c4039b5dff8..78682a4e0f00c2 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4711,11 +4711,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_CLONE_FINALLY, &Compiler::fgCloneFinally); - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - // Do some flow-related optimizations // if (opts.OptimizationEnabled()) @@ -4726,6 +4721,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl return fgHeadTailMerge(true); }); + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + // Merge common throw blocks // DoPhase(this, PHASE_MERGE_THROWS, &Compiler::fgTailMergeThrows); @@ -4735,6 +4735,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_EARLY_UPDATE_FLOW_GRAPH, &Compiler::fgUpdateFlowGraphPhase); } + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + // Promote struct locals // DoPhase(this, PHASE_PROMOTE_STRUCTS, &Compiler::fgPromoteStructs); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 834a53be776a61..8c619d2a596404 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -6600,6 +6600,13 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) FlowEdge* const newEdge = fgAddRefPred(crossJumpTarget, predBlock); predBlock->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); } + + // For tail merge we have a common successor of predBlock and + // crossJumpTarget, so the profile update can be done locally. + if (crossJumpTarget->hasProfileWeight()) + { + crossJumpTarget->setBBProfileWeight(crossJumpTarget->bbWeight + predBlock->bbWeight); + } } // We changed things From ee9de040bba9ede8fd1fbd9deb0998fc9c80b237 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 30 Dec 2024 19:28:05 -0500 Subject: [PATCH 2/8] Simplify throw merging --- src/coreclr/jit/fgehopt.cpp | 48 ++++++------------------------------- 1 file changed, 7 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index d619e501e3b56a..dce67ac1cf2e27 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2329,7 +2329,6 @@ PhaseStatus Compiler::fgTailMergeThrows() // The second pass modifies flow so that predecessors of // non-canonical throw blocks now transfer control to the // appropriate canonical block. - unsigned numCandidates = 0; // First pass // @@ -2393,7 +2392,6 @@ PhaseStatus Compiler::fgTailMergeThrows() // Yes, this one can be optimized away... JITDUMP(" in " FMT_BB " can be dup'd to canonical " FMT_BB "\n", block->bbNum, canonicalBlock->bbNum); blockMap.Set(block, canonicalBlock); - numCandidates++; } else { @@ -2403,9 +2401,8 @@ PhaseStatus Compiler::fgTailMergeThrows() } } - assert(numCandidates <= optNoReturnCallCount); - // Bail if no candidates were found + const unsigned numCandidates = blockMap.GetCount(); if (numCandidates == 0) { JITDUMP("\n*************** no throws can be tail merged, sorry\n"); @@ -2417,56 +2414,25 @@ PhaseStatus Compiler::fgTailMergeThrows() // Second pass. // // We walk the map rather than the block list, to save a bit of time. - unsigned updateCount = 0; - for (BlockToBlockMap::Node* const iter : BlockToBlockMap::KeyValueIteration(&blockMap)) { BasicBlock* const nonCanonicalBlock = iter->GetKey(); BasicBlock* const canonicalBlock = iter->GetValue(); - FlowEdge* nextPredEdge = nullptr; - bool updated = false; // Walk pred list of the non canonical block, updating flow to target // the canonical block instead. for (BasicBlock* const predBlock : nonCanonicalBlock->PredBlocksEditing()) { - switch (predBlock->GetKind()) - { - case BBJ_ALWAYS: - case BBJ_COND: - case BBJ_SWITCH: - { - JITDUMP("*** " FMT_BB " now branching to " FMT_BB "\n", predBlock->bbNum, canonicalBlock->bbNum); - fgReplaceJumpTarget(predBlock, nonCanonicalBlock, canonicalBlock); - updated = true; - } - break; - - default: - // We don't expect other kinds of preds, and it is safe to ignore them - // as flow is still correct, just not as optimized as it could be. - break; - } + JITDUMP("*** " FMT_BB " now branching to " FMT_BB "\n", predBlock->bbNum, canonicalBlock->bbNum); + fgReplaceJumpTarget(predBlock, nonCanonicalBlock, canonicalBlock); } - - if (updated) - { - updateCount++; - } - } - - if (updateCount == 0) - { - return PhaseStatus::MODIFIED_NOTHING; } - // TODO: Update the count of noreturn call sites -- this feeds a heuristic in morph - // to determine if these noreturn calls should be tail called. - // - // Updating the count does not lead to better results, so deferring for now. + // Update the count of noreturn call sites // - JITDUMP("Made %u updates\n", updateCount); - assert(updateCount < optNoReturnCallCount); + JITDUMP("Made %u updates\n", numCandidates); + assert(numCandidates < optNoReturnCallCount); + optNoReturnCallCount -= numCandidates; // If we altered flow, reset fgModified. Given where we sit in the // phase list, flow-dependent side data hasn't been built yet, so From 8827743299b9440243bb7ef182dceb8c49f9f1cc Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 30 Dec 2024 19:45:51 -0500 Subject: [PATCH 3/8] Clean up other fgReplaceJumpTarget call sites --- src/coreclr/jit/fgbasic.cpp | 17 +---------------- src/coreclr/jit/optimizer.cpp | 21 +-------------------- src/coreclr/jit/redundantbranchopts.cpp | 2 +- 3 files changed, 3 insertions(+), 37 deletions(-) diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 320ad81906c310..652a2d434380b1 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -5086,22 +5086,7 @@ BasicBlock* Compiler::fgRemoveBlock(BasicBlock* block, bool unreachable) for (BasicBlock* const predBlock : block->PredBlocksEditing()) { /* change all jumps/refs to the removed block */ - switch (predBlock->GetKind()) - { - default: - noway_assert(!"Unexpected bbKind in fgRemoveBlock()"); - break; - - case BBJ_COND: - case BBJ_CALLFINALLY: - case BBJ_CALLFINALLYRET: - case BBJ_ALWAYS: - case BBJ_EHCATCHRET: - case BBJ_SWITCH: - case BBJ_EHFINALLYRET: - fgReplaceJumpTarget(predBlock, block, succBlock); - break; - } + fgReplaceJumpTarget(predBlock, block, succBlock); } fgUnlinkBlockForRemoval(block); diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index c734c1c8a7a3a5..2686d216b0ab0a 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2269,26 +2269,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) JITDUMP("Redirecting non-loop " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, bTest->bbNum, predBlock->bbNum, bNewCond->bbNum); - switch (predBlock->GetKind()) - { - case BBJ_ALWAYS: - case BBJ_CALLFINALLY: - case BBJ_CALLFINALLYRET: - case BBJ_COND: - case BBJ_SWITCH: - case BBJ_EHFINALLYRET: - fgReplaceJumpTarget(predBlock, bTest, bNewCond); - break; - - case BBJ_EHCATCHRET: - case BBJ_EHFILTERRET: - // These block types should not need redirecting - break; - - default: - assert(!"Unexpected bbKind for predecessor block"); - break; - } + fgReplaceJumpTarget(predBlock, bTest, bNewCond); } // If we have profile data for all blocks and we know that we are cloning the diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index fa91d22e8346bc..e9a5e9e3e6e5c6 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -1709,7 +1709,7 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti) BasicBlock* const ambBlock = jti.m_ambiguousVNBlock; if ((ambBlock != nullptr) && jti.m_block->KindIs(BBJ_COND) && (jti.m_block->GetUniquePred(this) == ambBlock)) { - JITDUMP(FMT_BB " has just one remaining predcessor " FMT_BB "\n", jti.m_block->bbNum, ambBlock->bbNum); + JITDUMP(FMT_BB " has just one remaining predecessor " FMT_BB "\n", jti.m_block->bbNum, ambBlock->bbNum); Statement* const stmt = jti.m_block->lastStmt(); assert(stmt != nullptr); From 189b526f35794e4b90dff3465573ed7d26925663 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 30 Dec 2024 23:29:22 -0500 Subject: [PATCH 4/8] Move shared pred edge iterator logic to base class --- src/coreclr/jit/block.h | 157 +++++++++++++++++++--------------------- 1 file changed, 73 insertions(+), 84 deletions(-) diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 03770815f0125a..5e41875e8a4be0 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -204,42 +204,60 @@ struct allMemoryKinds } }; +// Base class for forward iterators over the predecessor edge linked list. +// Subclasses decide what the iterator should yield (edge, source block, etc.). +// The pred list cannot be modified during iteration unless allowEdits is true. +// +template +class BasePredIterator +{ +private: + // When allowEdits=false, try to guard against the user of the iterator from modifying the predecessor list + // being traversed: cache the edge we think should be next, then check it when we actually do the `++` + // operation. This is a bit conservative, but attempts to protect against callers assuming too much about + // this iterator implementation. + // When allowEdits=true, m_next is always used to update m_pred, so changes to m_pred don't break the iterator. + FlowEdge* m_next; + +protected: + FlowEdge* m_pred; + +public: + BasePredIterator(FlowEdge* pred); + + virtual IteratorType* operator*() const = 0; + + BasePredIterator& operator++(); + + bool operator!=(const BasePredIterator& i) const + { + return m_pred != i.m_pred; + } +}; + // PredEdgeList: adapter class for forward iteration of the predecessor edge linked list using range-based `for`, // normally used via BasicBlock::PredEdges(), e.g.: // for (FlowEdge* const edge : block->PredEdges()) ... +// allowEdits controls whether the iterator should be resilient to changes to the predecessor list. // +template class PredEdgeList { FlowEdge* m_begin; // Forward iterator for the predecessor edges linked list. - // The caller can't make changes to the preds list when using this. // - class iterator + class PredEdgeIterator : public BasePredIterator { - FlowEdge* m_pred; - -#ifdef DEBUG - // Try to guard against the user of the iterator from making changes to the IR that would invalidate - // the iterator: cache the edge we think should be next, then check it when we actually do the `++` - // operation. This is a bit conservative, but attempts to protect against callers assuming too much about - // this iterator implementation. - FlowEdge* m_next; -#endif - public: - iterator(FlowEdge* pred); - - FlowEdge* operator*() const + PredEdgeIterator(FlowEdge* pred) + : BasePredIterator(pred) { - return m_pred; } - iterator& operator++(); - - bool operator!=(const iterator& i) const + FlowEdge* operator*() const override { - return m_pred != i.m_pred; + return this->m_pred; } }; @@ -249,14 +267,14 @@ class PredEdgeList { } - iterator begin() const + PredEdgeIterator begin() const { - return iterator(m_begin); + return PredEdgeIterator(m_begin); } - iterator end() const + PredEdgeIterator end() const { - return iterator(nullptr); + return PredEdgeIterator(nullptr); } }; @@ -271,30 +289,16 @@ class PredBlockList FlowEdge* m_begin; // Forward iterator for the predecessor edges linked list, yielding the predecessor block, not the edge. - // The caller can't make changes to the preds list when using this. // - class iterator + class PredBlockIterator : public BasePredIterator { - FlowEdge* m_pred; - - // When allowEdits=false, try to guard against the user of the iterator from modifying the predecessor list - // being traversed: cache the edge we think should be next, then check it when we actually do the `++` - // operation. This is a bit conservative, but attempts to protect against callers assuming too much about - // this iterator implementation. - // When allowEdits=true, m_next is always used to update m_pred, so changes to m_pred don't break the iterator. - FlowEdge* m_next; - public: - iterator(FlowEdge* pred); - - BasicBlock* operator*() const; - - iterator& operator++(); - - bool operator!=(const iterator& i) const + PredBlockIterator(FlowEdge* pred) + : BasePredIterator(pred) { - return m_pred != i.m_pred; } + + BasicBlock* operator*() const override; }; public: @@ -303,14 +307,14 @@ class PredBlockList { } - iterator begin() const + PredBlockIterator begin() const { - return iterator(m_begin); + return PredBlockIterator(m_begin); } - iterator end() const + PredBlockIterator end() const { - return iterator(nullptr); + return PredBlockIterator(nullptr); } }; @@ -1536,9 +1540,18 @@ struct BasicBlock : private LIR::Range // PredEdges: convenience method for enabling range-based `for` iteration over predecessor edges, e.g.: // for (FlowEdge* const edge : block->PredEdges()) ... // - PredEdgeList PredEdges() const + PredEdgeList PredEdges() const + { + return PredEdgeList(bbPreds); + } + + // PredEdgesEditing: convenience method for enabling range-based `for` iteration over predecessor edges, e.g.: + // for (FlowEdge* const edge : block->PredEdges()) ... + // This iterator tolerates modifications to bbPreds. + // + PredEdgeList PredEdgesEditing() const { - return PredEdgeList(bbPreds); + return PredEdgeList(bbPreds); } // PredBlocks: convenience method for enabling range-based `for` iteration over predecessor blocks, e.g.: @@ -2424,30 +2437,8 @@ inline BasicBlock* BBArrayIterator::operator*() const // Pred list iterator implementations (that are required to be defined after the declaration of BasicBlock and FlowEdge) -inline PredEdgeList::iterator::iterator(FlowEdge* pred) - : m_pred(pred) -{ -#ifdef DEBUG - m_next = (m_pred == nullptr) ? nullptr : m_pred->getNextPredEdge(); -#endif -} - -inline PredEdgeList::iterator& PredEdgeList::iterator::operator++() -{ - FlowEdge* next = m_pred->getNextPredEdge(); - -#ifdef DEBUG - // Check that the next block is the one we expect to see. - assert(next == m_next); - m_next = (next == nullptr) ? nullptr : next->getNextPredEdge(); -#endif // DEBUG - - m_pred = next; - return *this; -} - -template -inline PredBlockList::iterator::iterator(FlowEdge* pred) +template +inline BasePredIterator::BasePredIterator(FlowEdge* pred) : m_pred(pred) { bool initNextPointer = allowEdits; @@ -2458,14 +2449,8 @@ inline PredBlockList::iterator::iterator(FlowEdge* pred) } } -template -inline BasicBlock* PredBlockList::iterator::operator*() const -{ - return m_pred->getSourceBlock(); -} - -template -inline typename PredBlockList::iterator& PredBlockList::iterator::operator++() +template +inline BasePredIterator& BasePredIterator::operator++() { if (allowEdits) { @@ -2477,11 +2462,9 @@ inline typename PredBlockList::iterator& PredBlockList:: { FlowEdge* next = m_pred->getNextPredEdge(); -#ifdef DEBUG // If allowEdits=false, check that the next block is the one we expect to see. assert(next == m_next); - m_next = (m_next == nullptr) ? nullptr : m_next->getNextPredEdge(); -#endif // DEBUG + INDEBUG(m_next = (m_next == nullptr) ? nullptr : m_next->getNextPredEdge()); m_pred = next; } @@ -2489,6 +2472,12 @@ inline typename PredBlockList::iterator& PredBlockList:: return *this; } +template +inline BasicBlock* PredBlockList::PredBlockIterator::operator*() const +{ + return this->m_pred->getSourceBlock(); +} + /***************************************************************************** * * The following call-backs supplied by the client; it's used by the code From d00f85fb32c74035076458064bf3bff1674696ad Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 2 Jan 2025 15:57:36 -0500 Subject: [PATCH 5/8] Make operator* non-virtual --- src/coreclr/jit/block.h | 28 +++++++++++++--------------- 1 file changed, 13 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/block.h b/src/coreclr/jit/block.h index 5e41875e8a4be0..23cf1f4fa44546 100644 --- a/src/coreclr/jit/block.h +++ b/src/coreclr/jit/block.h @@ -205,10 +205,10 @@ struct allMemoryKinds }; // Base class for forward iterators over the predecessor edge linked list. -// Subclasses decide what the iterator should yield (edge, source block, etc.). +// Subclasses decide what the iterator yields (edge, source block, etc.) by implementing the dereference operator. // The pred list cannot be modified during iteration unless allowEdits is true. // -template +template class BasePredIterator { private: @@ -222,11 +222,9 @@ class BasePredIterator protected: FlowEdge* m_pred; -public: BasePredIterator(FlowEdge* pred); - virtual IteratorType* operator*() const = 0; - +public: BasePredIterator& operator++(); bool operator!=(const BasePredIterator& i) const @@ -247,15 +245,15 @@ class PredEdgeList // Forward iterator for the predecessor edges linked list. // - class PredEdgeIterator : public BasePredIterator + class PredEdgeIterator : public BasePredIterator { public: PredEdgeIterator(FlowEdge* pred) - : BasePredIterator(pred) + : BasePredIterator(pred) { } - FlowEdge* operator*() const override + FlowEdge* operator*() const { return this->m_pred; } @@ -290,15 +288,15 @@ class PredBlockList // Forward iterator for the predecessor edges linked list, yielding the predecessor block, not the edge. // - class PredBlockIterator : public BasePredIterator + class PredBlockIterator : public BasePredIterator { public: PredBlockIterator(FlowEdge* pred) - : BasePredIterator(pred) + : BasePredIterator(pred) { } - BasicBlock* operator*() const override; + BasicBlock* operator*() const; }; public: @@ -2437,8 +2435,8 @@ inline BasicBlock* BBArrayIterator::operator*() const // Pred list iterator implementations (that are required to be defined after the declaration of BasicBlock and FlowEdge) -template -inline BasePredIterator::BasePredIterator(FlowEdge* pred) +template +inline BasePredIterator::BasePredIterator(FlowEdge* pred) : m_pred(pred) { bool initNextPointer = allowEdits; @@ -2449,8 +2447,8 @@ inline BasePredIterator::BasePredIterator(FlowEdge* pr } } -template -inline BasePredIterator& BasePredIterator::operator++() +template +inline BasePredIterator& BasePredIterator::operator++() { if (allowEdits) { From 24ca55cf7cb3ac28f0e47a667903df3d1427b5fd Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 2 Jan 2025 17:20:22 -0500 Subject: [PATCH 6/8] Undo loop inversion changes --- src/coreclr/jit/optimizer.cpp | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 2686d216b0ab0a..c734c1c8a7a3a5 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2269,7 +2269,26 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) JITDUMP("Redirecting non-loop " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum, bTest->bbNum, predBlock->bbNum, bNewCond->bbNum); - fgReplaceJumpTarget(predBlock, bTest, bNewCond); + switch (predBlock->GetKind()) + { + case BBJ_ALWAYS: + case BBJ_CALLFINALLY: + case BBJ_CALLFINALLYRET: + case BBJ_COND: + case BBJ_SWITCH: + case BBJ_EHFINALLYRET: + fgReplaceJumpTarget(predBlock, bTest, bNewCond); + break; + + case BBJ_EHCATCHRET: + case BBJ_EHFILTERRET: + // These block types should not need redirecting + break; + + default: + assert(!"Unexpected bbKind for predecessor block"); + break; + } } // If we have profile data for all blocks and we know that we are cloning the From d6e9939c4becbebd40434d56aa42b38701203092 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 2 Jan 2025 17:48:37 -0500 Subject: [PATCH 7/8] Propagate profile data in throw merging --- src/coreclr/jit/compiler.cpp | 8 ++++---- src/coreclr/jit/fgehopt.cpp | 26 +++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 78682a4e0f00c2..7da99822082092 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4721,15 +4721,15 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl return fgHeadTailMerge(true); }); + // Merge common throw blocks + // + DoPhase(this, PHASE_MERGE_THROWS, &Compiler::fgTailMergeThrows); + // Drop back to just checking profile likelihoods. // activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - // Merge common throw blocks - // - DoPhase(this, PHASE_MERGE_THROWS, &Compiler::fgTailMergeThrows); - // Run an early flow graph simplification pass // DoPhase(this, PHASE_EARLY_UPDATE_FLOW_GRAPH, &Compiler::fgUpdateFlowGraphPhase); diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index dce67ac1cf2e27..eba203153622f2 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2410,6 +2410,7 @@ PhaseStatus Compiler::fgTailMergeThrows() } JITDUMP("\n*** found %d merge candidates, rewriting flow\n\n", numCandidates); + bool modifiedProfile = false; // Second pass. // @@ -2418,14 +2419,37 @@ PhaseStatus Compiler::fgTailMergeThrows() { BasicBlock* const nonCanonicalBlock = iter->GetKey(); BasicBlock* const canonicalBlock = iter->GetValue(); + weight_t removedWeight = BB_ZERO_WEIGHT; // Walk pred list of the non canonical block, updating flow to target // the canonical block instead. - for (BasicBlock* const predBlock : nonCanonicalBlock->PredBlocksEditing()) + for (FlowEdge* const predEdge : nonCanonicalBlock->PredEdgesEditing()) { + removedWeight += predEdge->getLikelyWeight(); + BasicBlock* const predBlock = predEdge->getSourceBlock(); JITDUMP("*** " FMT_BB " now branching to " FMT_BB "\n", predBlock->bbNum, canonicalBlock->bbNum); fgReplaceJumpTarget(predBlock, nonCanonicalBlock, canonicalBlock); } + + if (canonicalBlock->hasProfileWeight()) + { + canonicalBlock->setBBProfileWeight(canonicalBlock->bbWeight + removedWeight); + modifiedProfile = true; + + // Don't bother updating flow into nonCanonicalBlock, since it will become unreachable + } + } + + // In practice, when we have true profile data, we can repair it locally above, since the no-return + // calls mean that there is no contribution from the throw blocks to any of their successors. + // However, these blocks won't be morphed into BBJ_THROW blocks until later, + // so mark profile data as inconsistent for now. + if (modifiedProfile) + { + JITDUMP( + "fgTailMergeThrows: Modified flow into no-return blocks that still have successors. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; } // Update the count of noreturn call sites From 53f5a946a8d63c46fd4b5898d7a5fd37830469a5 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 2 Jan 2025 20:57:31 -0500 Subject: [PATCH 8/8] Maintain profile through flowgraph simplification --- src/coreclr/jit/compiler.cpp | 15 +++------ src/coreclr/jit/fgopt.cpp | 53 ++++++++++++-------------------- src/coreclr/jit/fgprofile.cpp | 3 +- src/coreclr/jit/morph.cpp | 58 ++++++----------------------------- 4 files changed, 36 insertions(+), 93 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 7da99822082092..6d4808e71cb381 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4725,21 +4725,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_MERGE_THROWS, &Compiler::fgTailMergeThrows); - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - // Run an early flow graph simplification pass // DoPhase(this, PHASE_EARLY_UPDATE_FLOW_GRAPH, &Compiler::fgUpdateFlowGraphPhase); } - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - // Promote struct locals // DoPhase(this, PHASE_PROMOTE_STRUCTS, &Compiler::fgPromoteStructs); @@ -4789,6 +4779,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_MORPH_IMPBYREF, &Compiler::fgRetypeImplicitByRefArgs); + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + #ifdef DEBUG // Now that locals have address-taken and implicit byref marked, we can safely apply stress. lvaStressLclFld(); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 8c619d2a596404..86c8d8df76d41a 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -1317,30 +1317,7 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc } #endif // DEBUG - // - // When we optimize a branch to branch we need to update the profile weight - // of bDest by subtracting out the weight of the path that is being optimized. - // - if (bDest->hasProfileWeight()) - { - FlowEdge* const edge = fgGetPredForBlock(bDest, block); - noway_assert(edge != nullptr); - - const weight_t edgeWeight = edge->getLikelyWeight(); - - // - // Update the bDest->bbWeight - // - if (bDest->bbWeight > edgeWeight) - { - bDest->bbWeight -= edgeWeight; - } - else - { - bDest->bbWeight = BB_ZERO_WEIGHT; - bDest->SetFlags(BBF_RUN_RARELY); // Set the RarelyRun flag - } - } + weight_t removedWeight; // Optimize the JUMP to empty unconditional JUMP to go to the new target switch (block->GetKind()) @@ -1348,6 +1325,7 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc case BBJ_ALWAYS: case BBJ_CALLFINALLYRET: { + removedWeight = block->bbWeight; fgRedirectTargetEdge(block, bDest->GetTarget()); break; } @@ -1356,11 +1334,13 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc if (block->TrueTargetIs(bDest)) { assert(!block->FalseTargetIs(bDest)); + removedWeight = block->GetTrueEdge()->getLikelyWeight(); fgRedirectTrueEdge(block, bDest->GetTarget()); } else { assert(block->FalseTargetIs(bDest)); + removedWeight = block->GetFalseEdge()->getLikelyWeight(); fgRedirectFalseEdge(block, bDest->GetTarget()); } break; @@ -1369,6 +1349,15 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc unreached(); } + // + // When we optimize a branch to branch we need to update the profile weight + // of bDest by subtracting out the weight of the path that is being optimized. + // + if (bDest->hasProfileWeight()) + { + bDest->setBBProfileWeight(max(0.0, bDest->bbWeight - removedWeight)); + } + return true; } return false; @@ -1628,17 +1617,10 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) // FlowEdge* const oldEdge = *jmpTab; - if (fgIsUsingProfileWeights() && bDest->hasProfileWeight()) + if (bDest->hasProfileWeight()) { weight_t const branchThroughWeight = oldEdge->getLikelyWeight(); - if (bDest->bbWeight > branchThroughWeight) - { - bDest->bbWeight -= branchThroughWeight; - } - else - { - bDest->bbSetRunRarely(); - } + bDest->setBBProfileWeight(max(0.0, bDest->bbWeight - branchThroughWeight)); } // Update the switch jump table @@ -1676,6 +1658,11 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) { // Invalidate the set of unique targets for block, since we modified the targets fgInvalidateSwitchDescMapEntry(block); + + JITDUMP( + "fgOptimizeSwitchBranches: Optimized switch flow. Profile needs to be re-propagated. Data %s consistent.\n", + fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; } Statement* switchStmt = nullptr; diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 0b8f4378c48270..6971e6304031cc 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -5134,7 +5134,8 @@ void Compiler::fgRepairProfileCondToUncond(BasicBlock* block, // If profile weights are consistent, expect at worst a slight underflow. // - if (fgPgoConsistent && (alternateNewWeight < 0.0)) + const bool checkProfileConsistency = hasFlag(activePhaseChecks, PhaseChecks::CHECK_PROFILE); + if (checkProfileConsistency && fgPgoConsistent && (alternateNewWeight < 0.0)) { assert(fgProfileWeightsEqual(alternateNewWeight, 0.0)); } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 7f6cd2cb3af97e..8f42dd8743ce4e 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -12658,63 +12658,23 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) fgRemoveStmt(block, lastStmt); result = FoldResult::FOLD_REMOVED_LAST_STMT; } - // block is a BBJ_COND that we are folding the conditional for. - // bTaken is the path that will always be taken from block. - // bNotTaken is the path that will never be taken from block. - // - BasicBlock* bTaken; - BasicBlock* bNotTaken; - FlowEdge* edgeTaken; + + FlowEdge *retainedEdge, *removedEdge; if (cond->AsIntCon()->gtIconVal != 0) { - // JTRUE 1 - transform the basic block into a BBJ_ALWAYS - bTaken = block->GetTrueTarget(); - bNotTaken = block->GetFalseTarget(); - - // Remove 'block' from the predecessor list of 'bNotTaken' */ - fgRemoveRefPred(block->GetFalseEdge()); - - edgeTaken = block->GetTrueEdge(); - block->SetKindAndTargetEdge(BBJ_ALWAYS, edgeTaken); + retainedEdge = block->GetTrueEdge(); + removedEdge = block->GetFalseEdge(); } else { - // JTRUE 0 - transform the basic block into a BBJ_ALWAYS - bTaken = block->GetFalseTarget(); - bNotTaken = block->GetTrueTarget(); - - // Remove 'block' from the predecessor list of 'bNotTaken' */ - fgRemoveRefPred(block->GetTrueEdge()); - - edgeTaken = block->GetFalseEdge(); - block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetFalseEdge()); + retainedEdge = block->GetFalseEdge(); + removedEdge = block->GetTrueEdge(); } - // We examine the taken edge (block -> bTaken) - // if block has valid profile weight and bTaken does not we try to adjust bTaken's weight - // else if bTaken has valid profile weight and block does not we try to adjust block's weight - // We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken - // - if (block->hasProfileWeight()) - { - if (!bTaken->hasProfileWeight()) - { - if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight)) - { - // Update the weight of bTaken - bTaken->inheritWeight(block); - } - } - } - else if (bTaken->hasProfileWeight()) - { - if (bTaken->countOfInEdges() == 1) - { - // Update the weight of block - block->inheritWeight(bTaken); - } - } + fgRemoveRefPred(removedEdge); + block->SetKindAndTargetEdge(BBJ_ALWAYS, retainedEdge); + fgRepairProfileCondToUncond(block, retainedEdge, removedEdge); #ifdef DEBUG if (verbose)