From 32e773559e74a95298a01742ef45431e5bb1efc7 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 8 Jan 2025 14:59:32 -0500 Subject: [PATCH 01/11] Use block weight helpers in more places --- src/coreclr/jit/fgehopt.cpp | 2 +- src/coreclr/jit/fgopt.cpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 0d620fd1821a97..80e6f184254dbf 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -2433,7 +2433,7 @@ PhaseStatus Compiler::fgTailMergeThrows() if (canonicalBlock->hasProfileWeight()) { - canonicalBlock->setBBProfileWeight(canonicalBlock->bbWeight + removedWeight); + canonicalBlock->increaseBBProfileWeight(removedWeight); modifiedProfile = true; // Don't bother updating flow into nonCanonicalBlock, since it is now unreachable diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 10d381c0caeaa8..0edee0df5e2cbc 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -1355,7 +1355,7 @@ bool Compiler::fgOptimizeBranchToEmptyUnconditional(BasicBlock* block, BasicBloc // if (bDest->hasProfileWeight()) { - bDest->setBBProfileWeight(max(0.0, bDest->bbWeight - removedWeight)); + bDest->decreaseBBProfileWeight(removedWeight); } return true; @@ -1620,7 +1620,7 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) if (bDest->hasProfileWeight()) { weight_t const branchThroughWeight = oldEdge->getLikelyWeight(); - bDest->setBBProfileWeight(max(0.0, bDest->bbWeight - branchThroughWeight)); + bDest->decreaseBBProfileWeight(branchThroughWeight); } // Update the switch jump table @@ -6624,7 +6624,7 @@ PhaseStatus Compiler::fgHeadTailMerge(bool early) // crossJumpTarget, so the profile update can be done locally. if (crossJumpTarget->hasProfileWeight()) { - crossJumpTarget->setBBProfileWeight(crossJumpTarget->bbWeight + predBlock->bbWeight); + crossJumpTarget->increaseBBProfileWeight(predBlock->bbWeight); } } From 57b7705289442c7cdef9b4b0846d2b18491ef03b Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 8 Jan 2025 19:01:00 -0500 Subject: [PATCH 02/11] Move profile checks to after morph --- src/coreclr/jit/compiler.cpp | 11 ++-- src/coreclr/jit/fgbasic.cpp | 22 +++++-- src/coreclr/jit/morph.cpp | 121 ++++++++++++++++------------------- 3 files changed, 78 insertions(+), 76 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 52975e90bf8854..038a77e6cc2733 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4779,11 +4779,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_MORPH_IMPBYREF, &Compiler::fgRetypeImplicitByRefArgs); - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - #ifdef DEBUG // Now that locals have address-taken and implicit byref marked, we can safely apply stress. lvaStressLclFld(); @@ -4813,6 +4808,12 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Enable IR checks activePhaseChecks |= PhaseChecks::CHECK_IR; }; + + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + DoPhase(this, PHASE_POST_MORPH, postMorphPhase); // GS security checks for unsafe buffers diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 652a2d434380b1..b73ef631968b1b 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -156,14 +156,28 @@ void Compiler::fgConvertBBToThrowBB(BasicBlock* block) } // Scrub this block from the pred lists of any successors - fgRemoveBlockAsPred(block); + bool profileInconsistent = false; + + for (BasicBlock* const succBlock : block->Succs(this)) + { + FlowEdge* const succEdge = fgRemoveAllRefPreds(succBlock, block); + + if (block->hasProfileWeight() && succBlock->hasProfileWeight()) + { + succBlock->decreaseBBProfileWeight(succEdge->getLikelyWeight()); + profileInconsistent |= (succBlock->NumSucc() > 0); + } + } + + if (profileInconsistent) + { + JITDUMP("Flow removal of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", block->bbNum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + } // Update jump kind after the scrub. block->SetKindAndTargetEdge(BBJ_THROW); block->RemoveFlags(BBF_RETLESS_CALL); // no longer a BBJ_CALLFINALLY - - // Any block with a throw is rare - block->bbSetRunRarely(); } /***************************************************************************** diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 8f42dd8743ce4e..ef47f8647bcb42 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5366,55 +5366,26 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // if (compCurBB->KindIs(BBJ_ALWAYS)) { + BasicBlock* const curBlock = compCurBB; + BasicBlock* const targetBlock = curBlock->GetTarget(); + // Flow no longer reaches the target from here. // - fgRemoveRefPred(compCurBB->GetTargetEdge()); + fgRemoveRefPred(curBlock->GetTargetEdge()); - // Adjust profile weights of the successor blocks. + // Adjust profile weights of the successor block. // // Note if this is a tail call to loop, further updates // are needed once we install the loop edge. // - BasicBlock* curBlock = compCurBB; - if (curBlock->hasProfileWeight()) + if (curBlock->hasProfileWeight() && targetBlock->hasProfileWeight()) { - weight_t weightLoss = curBlock->bbWeight; - BasicBlock* nextBlock = curBlock->GetTarget(); + targetBlock->decreaseBBProfileWeight(curBlock->bbWeight); - while (nextBlock->hasProfileWeight()) + if (targetBlock->NumSucc() > 0) { - // Since we have linear flow we can update the next block weight. - // - weight_t const nextWeight = nextBlock->bbWeight; - weight_t const newNextWeight = nextWeight - weightLoss; - - // If the math would result in a negative weight then there's - // no local repair we can do; just leave things inconsistent. - // - if (newNextWeight >= 0) - { - // Note if we'd already morphed the IR in nextblock we might - // have done something profile sensitive that we should arguably reconsider. - // - JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", nextBlock->bbNum, - nextWeight, newNextWeight); - - nextBlock->setBBProfileWeight(newNextWeight); - } - else - { - JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT - " is less than direct flow pred " FMT_BB " weight " FMT_WT "\n", - nextBlock->bbNum, nextWeight, compCurBB->bbNum, weightLoss); - } - - if (!nextBlock->KindIs(BBJ_ALWAYS)) - { - break; - } - - curBlock = nextBlock; - nextBlock = curBlock->GetTarget(); + JITDUMP("Flow removal out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", curBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; } } } @@ -6755,12 +6726,12 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa fgRemoveStmt(block, lastStmt); // Set the loop edge. + BasicBlock* entryBB; if (opts.IsOSR()) { // Todo: this may not look like a viable loop header. // Might need the moral equivalent of an init BB. - FlowEdge* const newEdge = fgAddRefPred(fgEntryBB, block); - block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); + entryBB = fgEntryBB; } else { @@ -6769,9 +6740,18 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa // TODO-Cleanup: We should really be expanding tailcalls into loops // much earlier than this, at a place where we do not need to have // hacky workarounds to figure out what the actual IL entry block is. - BasicBlock* firstILBB = fgGetFirstILBlock(); - FlowEdge* const newEdge = fgAddRefPred(firstILBB, block); - block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); + entryBB = fgGetFirstILBlock(); + } + + FlowEdge* const newEdge = fgAddRefPred(entryBB, block); + block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge); + + // Update profile + if (block->hasProfileWeight() && entryBB->hasProfileWeight()) + { + entryBB->increaseBBProfileWeight(block->bbWeight); + JITDUMP("Flow into entry BB " FMT_BB " increased. Data %s inconsistent.\n", entryBB->bbNum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; } // Finish hooking things up. @@ -12745,6 +12725,7 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) unsigned jumpCnt = block->GetSwitchTargets()->bbsCount; FlowEdge** jumpTab = block->GetSwitchTargets()->bbsDstTab; bool foundVal = false; + bool profileInconsistent = false; for (unsigned val = 0; val < jumpCnt; val++, jumpTab++) { @@ -12752,6 +12733,13 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) assert(curEdge->getDestinationBlock()->countOfInEdges() > 0); + BasicBlock* const targetBlock = curEdge->getDestinationBlock(); + if (block->hasProfileWeight() && targetBlock->hasProfileWeight()) + { + targetBlock->decreaseBBProfileWeight(curEdge->getLikelyWeight()); + profileInconsistent |= (targetBlock->NumSucc() > 0); + } + // If val matches switchVal or we are at the last entry and // we never found the switch value then set the new jump dest @@ -12759,6 +12747,12 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) { block->SetKindAndTargetEdge(BBJ_ALWAYS, curEdge); foundVal = true; + + if (block->hasProfileWeight() && targetBlock->hasProfileWeight()) + { + targetBlock->increaseBBProfileWeight(block->bbWeight); + profileInconsistent |= (targetBlock->NumSucc() > 0); + } } else { @@ -12767,6 +12761,12 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) } } + if (profileInconsistent) + { + JITDUMP("Flow change out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", block->bbNum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + } + assert(foundVal); #ifdef DEBUG if (verbose) @@ -14263,6 +14263,14 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) BasicBlock* condBlock = fgNewBBafter(BBJ_ALWAYS, block, true); BasicBlock* elseBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true); + // Update flowgraph + fgRedirectTargetEdge(block, condBlock); + condBlock->SetTargetEdge(fgAddRefPred(elseBlock, condBlock)); + elseBlock->SetTargetEdge(fgAddRefPred(remainderBlock, elseBlock)); + + // Propagate flow from block into condBlock + condBlock->inheritWeight(block); + // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). // If they're not internal, mark them as imported to avoid asserts about un-imported blocks. if (!block->HasFlag(BBF_INTERNAL)) @@ -14276,27 +14284,6 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) block->RemoveFlags(BBF_NEEDS_GCPOLL); remainderBlock->SetFlags(propagateFlagsToRemainder | propagateFlagsToAll); - condBlock->inheritWeight(block); - - // Make sure remainderBlock gets exactly the same weight as block after split - assert(condBlock->bbWeight == remainderBlock->bbWeight); - - assert(block->KindIs(BBJ_ALWAYS)); - fgRedirectTargetEdge(block, condBlock); - - { - FlowEdge* const newEdge = fgAddRefPred(elseBlock, condBlock); - condBlock->SetTargetEdge(newEdge); - } - - { - FlowEdge* const newEdge = fgAddRefPred(remainderBlock, elseBlock); - elseBlock->SetTargetEdge(newEdge); - } - - assert(condBlock->JumpsToNext()); - assert(elseBlock->JumpsToNext()); - condBlock->SetFlags(propagateFlagsToAll); elseBlock->SetFlags(propagateFlagsToAll); @@ -14324,8 +14311,7 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) const unsigned thenLikelihood = qmark->ThenNodeLikelihood(); const unsigned elseLikelihood = qmark->ElseNodeLikelihood(); - FlowEdge* const newEdge = fgAddRefPred(remainderBlock, thenBlock); - thenBlock->SetTargetEdge(newEdge); + thenBlock->SetTargetEdge(fgAddRefPred(remainderBlock, thenBlock)); assert(condBlock->TargetIs(elseBlock)); FlowEdge* const elseEdge = fgAddRefPred(thenBlock, condBlock); @@ -14344,6 +14330,7 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) // +-->-------------+ // bbj_cond(true) // + // TODO: Remove unnecessary condition reversal gtReverseCond(condExpr); const unsigned thenLikelihood = qmark->ThenNodeLikelihood(); From 3f5f514ac22149d5e13cf5e8691ea07a608055fb Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 9 Jan 2025 13:21:28 -0500 Subject: [PATCH 03/11] Move profile checks to after post-morph --- src/coreclr/jit/compiler.cpp | 10 +++++----- src/coreclr/jit/fgbasic.cpp | 3 ++- src/coreclr/jit/morph.cpp | 29 +++++++++++++++++------------ 3 files changed, 24 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 038a77e6cc2733..abdc0cad578ac0 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4809,17 +4809,17 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl activePhaseChecks |= PhaseChecks::CHECK_IR; }; - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - DoPhase(this, PHASE_POST_MORPH, postMorphPhase); // GS security checks for unsafe buffers // DoPhase(this, PHASE_GS_COOKIE, &Compiler::gsPhase); + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + if (opts.OptimizationEnabled()) { // Compute the block weights diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index b73ef631968b1b..a34e2622157ba3 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -171,7 +171,8 @@ void Compiler::fgConvertBBToThrowBB(BasicBlock* block) if (profileInconsistent) { - JITDUMP("Flow removal of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", block->bbNum, fgPgoConsistent ? "is now" : "was already"); + JITDUMP("Flow removal of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", block->bbNum, + fgPgoConsistent ? "is now" : "was already"); fgPgoConsistent = false; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index ef47f8647bcb42..4c08c903c97ad1 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -5366,7 +5366,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // if (compCurBB->KindIs(BBJ_ALWAYS)) { - BasicBlock* const curBlock = compCurBB; + BasicBlock* const curBlock = compCurBB; BasicBlock* const targetBlock = curBlock->GetTarget(); // Flow no longer reaches the target from here. @@ -5384,7 +5384,8 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) if (targetBlock->NumSucc() > 0) { - JITDUMP("Flow removal out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", curBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); + JITDUMP("Flow removal out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", + curBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); fgPgoConsistent = false; } } @@ -6750,7 +6751,8 @@ void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCa if (block->hasProfileWeight() && entryBB->hasProfileWeight()) { entryBB->increaseBBProfileWeight(block->bbWeight); - JITDUMP("Flow into entry BB " FMT_BB " increased. Data %s inconsistent.\n", entryBB->bbNum, fgPgoConsistent ? "is now" : "was already"); + JITDUMP("Flow into entry BB " FMT_BB " increased. Data %s inconsistent.\n", entryBB->bbNum, + fgPgoConsistent ? "is now" : "was already"); fgPgoConsistent = false; } @@ -12721,10 +12723,10 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) // modify the flow graph // Find the actual jump target - size_t switchVal = (size_t)cond->AsIntCon()->gtIconVal; - unsigned jumpCnt = block->GetSwitchTargets()->bbsCount; - FlowEdge** jumpTab = block->GetSwitchTargets()->bbsDstTab; - bool foundVal = false; + size_t switchVal = (size_t)cond->AsIntCon()->gtIconVal; + unsigned jumpCnt = block->GetSwitchTargets()->bbsCount; + FlowEdge** jumpTab = block->GetSwitchTargets()->bbsDstTab; + bool foundVal = false; bool profileInconsistent = false; for (unsigned val = 0; val < jumpCnt; val++, jumpTab++) @@ -12763,7 +12765,8 @@ Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block) if (profileInconsistent) { - JITDUMP("Flow change out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", block->bbNum, fgPgoConsistent ? "is now" : "was already"); + JITDUMP("Flow change out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", block->bbNum, + fgPgoConsistent ? "is now" : "was already"); fgPgoConsistent = false; } @@ -14268,7 +14271,8 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) condBlock->SetTargetEdge(fgAddRefPred(elseBlock, condBlock)); elseBlock->SetTargetEdge(fgAddRefPred(remainderBlock, elseBlock)); - // Propagate flow from block into condBlock + // Propagate flow from block into condBlock. + // Leave flow out of remainderBlock intact, as it will post-dominate block. condBlock->inheritWeight(block); // These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter). @@ -14298,6 +14302,7 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) // +--->--------+ // bbj_cond(true) // + // TODO: Remove unnecessary condition reversal gtReverseCond(condExpr); thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true); @@ -14314,9 +14319,9 @@ bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt) thenBlock->SetTargetEdge(fgAddRefPred(remainderBlock, thenBlock)); assert(condBlock->TargetIs(elseBlock)); - FlowEdge* const elseEdge = fgAddRefPred(thenBlock, condBlock); - FlowEdge* const thenEdge = condBlock->GetTargetEdge(); - condBlock->SetCond(thenEdge, elseEdge); + FlowEdge* const thenEdge = fgAddRefPred(thenBlock, condBlock); + FlowEdge* const elseEdge = condBlock->GetTargetEdge(); + condBlock->SetCond(elseEdge, thenEdge); thenBlock->inheritWeightPercentage(condBlock, thenLikelihood); elseBlock->inheritWeightPercentage(condBlock, elseLikelihood); thenEdge->setLikelihood(thenLikelihood / 100.0); From 3e89ceffd5ac077dde4381c3556d7f4e64ce542b Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 9 Jan 2025 15:45:36 -0500 Subject: [PATCH 04/11] Add metrics --- src/coreclr/jit/jitmetadatalist.h | 2 ++ src/coreclr/jit/morph.cpp | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h index 7ba0d732ab8645..db0f66f3f2c97e 100644 --- a/src/coreclr/jit/jitmetadatalist.h +++ b/src/coreclr/jit/jitmetadatalist.h @@ -68,6 +68,8 @@ JITMETADATAMETRIC(InlineAttempt, int, 0) JITMETADATAMETRIC(InlineCount, int, 0) JITMETADATAMETRIC(ProfileConsistentBeforeInline, int, 0) JITMETADATAMETRIC(ProfileConsistentAfterInline, int, 0) +JITMETADATAMETRIC(ProfileConsistentBeforeMorph, int, 0) +JITMETADATAMETRIC(ProfileConsistentAfterMorph, int, 0) JITMETADATAMETRIC(ProfileSynthesizedBlendedOrRepaired, int, 0) JITMETADATAMETRIC(ProfileInconsistentInitially, int, 0) JITMETADATAMETRIC(ProfileInconsistentResetLeave, int, 0) diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 4c08c903c97ad1..93ae773a27d172 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -13411,6 +13411,11 @@ PhaseStatus Compiler::fgMorphBlocks() // fgGlobalMorph = true; + if (fgPgoConsistent) + { + Metrics.ProfileConsistentBeforeMorph = 1; + } + if (opts.OptimizationEnabled()) { // Local assertion prop is enabled if we are optimizing. @@ -13547,6 +13552,11 @@ PhaseStatus Compiler::fgMorphBlocks() // may no longer be canonical. fgCanonicalizeFirstBB(); + if (fgPgoConsistent) + { + Metrics.ProfileConsistentAfterMorph = 1; + } + INDEBUG(fgPostGlobalMorphChecks();) return PhaseStatus::MODIFIED_EVERYTHING; From c9272ba6e691a0eacdd71dda1b5f0840c01d2332 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 9 Jan 2025 16:04:09 -0500 Subject: [PATCH 05/11] Remove whitespace change --- src/coreclr/jit/compiler.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index abdc0cad578ac0..5c6a0f662c22d8 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4808,7 +4808,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Enable IR checks activePhaseChecks |= PhaseChecks::CHECK_IR; }; - DoPhase(this, PHASE_POST_MORPH, postMorphPhase); // GS security checks for unsafe buffers From b73271c6835083796002d57847ba9c200dcec72f Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Thu, 9 Jan 2025 17:30:43 -0500 Subject: [PATCH 06/11] Move profile checks to after loop inversion --- src/coreclr/jit/compiler.cpp | 15 ++++--- src/coreclr/jit/optimizer.cpp | 77 ++++++++++------------------------- 2 files changed, 32 insertions(+), 60 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 5c6a0f662c22d8..d5e3a7c58ff956 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4814,11 +4814,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_GS_COOKIE, &Compiler::gsPhase); - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - if (opts.OptimizationEnabled()) { // Compute the block weights @@ -4829,6 +4824,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops); + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + // Run some flow graph optimizations (but don't reorder) // DoPhase(this, PHASE_OPTIMIZE_FLOW, &Compiler::optOptimizeFlow); @@ -4877,6 +4877,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_COMPUTE_DOMINATORS, &Compiler::fgComputeDominators); } + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + #ifdef DEBUG fgDebugCheckLinks(); #endif diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index fbd05f5e8ea405..9a54e32f58cdfa 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2020,6 +2020,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; + bool haveProfileWeights = false; bool allProfileWeightsAreValid = false; weight_t const weightBlock = block->bbWeight; weight_t const weightTest = bTest->bbWeight; @@ -2040,6 +2041,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) return true; } + haveProfileWeights = true; + // We generally expect weightTest > weightTop // // Tolerate small inconsistencies... @@ -2203,25 +2206,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // Flag the block that received the copy as potentially having various constructs. bNewCond->CopyFlags(bTest, BBF_COPY_PROPAGATE); - // Fix flow and profile - // - bNewCond->inheritWeight(block); - - if (allProfileWeightsAreValid) - { - weight_t const delta = weightTest - weightTop; - - // If there is just one outside edge incident on bTest, then ideally delta == block->bbWeight. - // But this might not be the case if profile data is inconsistent. - // - // And if bTest has multiple outside edges we want to account for the weight of them all. - // - if (delta > block->bbWeight) - { - bNewCond->setBBProfileWeight(delta); - } - } - // Update pred info // // For now we set the likelihood of the newCond branch to match @@ -2245,6 +2229,15 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) fgRedirectTargetEdge(block, bNewCond); assert(block->JumpsToNext()); + // Fix flow and profile + // + bNewCond->inheritWeight(block); + + if (haveProfileWeights) + { + bTest->decreaseBBProfileWeight(block->bbWeight); + } + // Move all predecessor edges that look like loop entry edges to point to the new cloned condition // block, not the existing condition block. The idea is that if we only move `block` to point to // `bNewCond`, but leave other `bTest` predecessors still pointing to `bTest`, when we eventually @@ -2256,8 +2249,9 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // unsigned const loopFirstNum = bTop->bbNum; unsigned const loopBottomNum = bTest->bbNum; - for (BasicBlock* const predBlock : bTest->PredBlocksEditing()) + for (FlowEdge* const predEdge : bTest->PredEdgesEditing()) { + BasicBlock* const predBlock = predEdge->getSourceBlock(); unsigned const bNum = predBlock->bbNum; if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum)) { @@ -2278,6 +2272,14 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) case BBJ_SWITCH: case BBJ_EHFINALLYRET: fgReplaceJumpTarget(predBlock, bTest, bNewCond); + + // Redirect profile weight, too + if (haveProfileWeights) + { + const weight_t edgeWeight = predEdge->getLikelyWeight(); + bNewCond->increaseBBProfileWeight(edgeWeight); + bTest->decreaseBBProfileWeight(edgeWeight); + } break; case BBJ_EHCATCHRET: @@ -2291,41 +2293,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } } - // If we have profile data for all blocks and we know that we are cloning the - // `bTest` block into `bNewCond` and thus changing the control flow from `block` so - // that it no longer goes directly to `bTest` anymore, we have to adjust - // various weights. - // - if (allProfileWeightsAreValid) - { - // Update the weight for bTest. Normally, this reduces the weight of the bTest, except in odd - // cases of stress modes with inconsistent weights. - // - JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", bTest->bbNum, weightTest, - weightTop); - bTest->inheritWeight(bTop); - -#ifdef DEBUG - // If we're checking profile data, see if profile for the two target blocks is consistent. - // - if ((activePhaseChecks & PhaseChecks::CHECK_PROFILE) == PhaseChecks::CHECK_PROFILE) - { - if (JitConfig.JitProfileChecks() > 0) - { - const ProfileChecks checks = (ProfileChecks)JitConfig.JitProfileChecks(); - const bool nextProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetFalseTarget(), checks); - const bool jumpProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetTrueTarget(), checks); - - if (hasFlag(checks, ProfileChecks::RAISE_ASSERT)) - { - assert(nextProfileOk); - assert(jumpProfileOk); - } - } - } -#endif // DEBUG - } - #ifdef DEBUG if (verbose) { From 174e1bff2cab3ba987f45708930c49bb4535ebce Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 10 Jan 2025 13:54:54 -0500 Subject: [PATCH 07/11] Move profile checks to after flow opts --- src/coreclr/jit/compiler.cpp | 8 +-- src/coreclr/jit/fgehopt.cpp | 22 ++++++--- src/coreclr/jit/fgopt.cpp | 93 ++++++++++++++++++++--------------- src/coreclr/jit/optimizer.cpp | 17 +++---- 4 files changed, 79 insertions(+), 61 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index d5e3a7c58ff956..5f86930e8ab8ef 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4824,15 +4824,15 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_INVERT_LOOPS, &Compiler::optInvertLoops); + // Run some flow graph optimizations (but don't reorder) + // + DoPhase(this, PHASE_OPTIMIZE_FLOW, &Compiler::optOptimizeFlow); + // Drop back to just checking profile likelihoods. // activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - // Run some flow graph optimizations (but don't reorder) - // - DoPhase(this, PHASE_OPTIMIZE_FLOW, &Compiler::optOptimizeFlow); - // Second pass of tail merge // DoPhase(this, PHASE_HEAD_TAIL_MERGE2, [this]() { diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index 80e6f184254dbf..42091556ee9402 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -175,6 +175,12 @@ PhaseStatus Compiler::fgRemoveEmptyFinally() currentBlock->SetKind(BBJ_ALWAYS); currentBlock->RemoveFlags(BBF_RETLESS_CALL); // no longer a BBJ_CALLFINALLY + // Update profile data into postTryFinallyBlock + if (currentBlock->hasProfileWeight()) + { + postTryFinallyBlock->increaseBBProfileWeight(currentBlock->bbWeight); + } + // Cleanup the postTryFinallyBlock fgCleanupContinuation(postTryFinallyBlock); @@ -668,12 +674,6 @@ PhaseStatus Compiler::fgRemoveEmptyTry() fgPrepareCallFinallyRetForRemoval(leave); fgRemoveBlock(leave, /* unreachable */ true); - // Remove profile weight into the continuation block - if (continuation->hasProfileWeight()) - { - continuation->decreaseBBProfileWeight(leave->bbWeight); - } - // (3) Convert the callfinally to a normal jump to the handler assert(callFinally->HasInitializedTarget()); callFinally->SetKind(BBJ_ALWAYS); @@ -1670,6 +1670,16 @@ PhaseStatus Compiler::fgCloneFinally() } } + // Update flow into normalCallFinallyReturn + if (normalCallFinallyReturn->hasProfileWeight()) + { + normalCallFinallyReturn->bbWeight = BB_ZERO_WEIGHT; + for (FlowEdge* const predEdge : normalCallFinallyReturn->PredEdges()) + { + normalCallFinallyReturn->increaseBBProfileWeight(predEdge->getLikelyWeight()); + } + } + // Done! JITDUMP("\nDone with EH#%u\n\n", XTnum); cloneCount++; diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 0edee0df5e2cbc..e593cf7478b0e9 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -1259,8 +1259,25 @@ void Compiler::fgUnreachableBlock(BasicBlock* block) // Mark the block as removed block->SetFlags(BBF_REMOVED); - // Update bbRefs and bbPreds for the blocks reached by this block - fgRemoveBlockAsPred(block); + // Update bbRefs and bbPreds for this block's successors + bool profileInconsistent = false; + for (BasicBlock* const succBlock : block->Succs(this)) + { + FlowEdge* const succEdge = fgRemoveAllRefPreds(succBlock, block); + + if (block->hasProfileWeight() && succBlock->hasProfileWeight()) + { + succBlock->decreaseBBProfileWeight(succEdge->getLikelyWeight()); + profileInconsistent |= (succBlock->NumSucc() > 0); + } + } + + if (profileInconsistent) + { + JITDUMP("Flow removal of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", block->bbNum, + fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + } } //------------------------------------------------------------- @@ -2592,13 +2609,13 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) estDupCostSz += expr->GetCostSz(); } - bool allProfileWeightsAreValid = false; - weight_t weightJump = bJump->bbWeight; - weight_t weightDest = bDest->bbWeight; - weight_t weightNext = bJump->Next()->bbWeight; - bool rareJump = bJump->isRunRarely(); - bool rareDest = bDest->isRunRarely(); - bool rareNext = bJump->Next()->isRunRarely(); + bool haveProfileWeights = false; + weight_t weightJump = bJump->bbWeight; + weight_t weightDest = bDest->bbWeight; + weight_t weightNext = bJump->Next()->bbWeight; + bool rareJump = bJump->isRunRarely(); + bool rareDest = bDest->isRunRarely(); + bool rareNext = bJump->Next()->isRunRarely(); // If we have profile data then we calculate the number of time // the loop will iterate into loopIterations @@ -2611,7 +2628,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) bDest->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY) && bJump->Next()->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY)) { - allProfileWeightsAreValid = true; + haveProfileWeights = true; if ((weightJump * 100) < weightDest) { @@ -2668,9 +2685,9 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) if (verbose) { printf("\nDuplication of the conditional block " FMT_BB " (always branch from " FMT_BB - ") %s, because the cost of duplication (%i) is %s than %i, validProfileWeights = %s\n", + ") %s, because the cost of duplication (%i) is %s than %i, haveProfileWeights = %s\n", bDest->bbNum, bJump->bbNum, costIsTooHigh ? "not done" : "performed", estDupCostSz, - costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, allProfileWeightsAreValid ? "true" : "false"); + costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, dspBool(haveProfileWeights)); } #endif // DEBUG @@ -2772,7 +2789,8 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) // bJump now falls through into the next block // - FlowEdge* const falseEdge = fgAddRefPred(bJump->Next(), bJump, destFalseEdge); + BasicBlock* const bDestFalseTarget = bJump->Next(); + FlowEdge* const falseEdge = fgAddRefPred(bDestFalseTarget, bJump, destFalseEdge); // bJump now jumps to bDest's normal jump target // @@ -2781,35 +2799,25 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) bJump->SetCond(bJump->GetTargetEdge(), falseEdge); - if (weightJump > 0) + // Update profile data + // + if (haveProfileWeights) { - if (allProfileWeightsAreValid) - { - if (weightDest > weightJump) - { - bDest->bbWeight = (weightDest - weightJump); - } - else if (!bDest->isRunRarely()) - { - bDest->bbWeight = BB_UNITY_WEIGHT; - } - } - else - { - weight_t newWeightDest = 0; + // bJump no longer flows into bDest + // + bDest->decreaseBBProfileWeight(bJump->bbWeight); + bDestNormalTarget->decreaseBBProfileWeight(bJump->bbWeight * destFalseEdge->getLikelihood()); + bDestFalseTarget->decreaseBBProfileWeight(bJump->bbWeight * destTrueEdge->getLikelihood()); - if (weightDest > weightJump) - { - newWeightDest = (weightDest - weightJump); - } - if (weightDest >= (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT) / 2) - { - newWeightDest = (weightDest * 2) / (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT); - } - if (newWeightDest > 0) - { - bDest->bbWeight = newWeightDest; - } + // Propagate bJump's weight into its new successors + // + bDestNormalTarget->increaseBBProfileWeight(bJump->GetTrueEdge()->getLikelyWeight()); + bDestFalseTarget->increaseBBProfileWeight(falseEdge->getLikelyWeight()); + + if ((bDestNormalTarget->NumSucc() > 0) || (bDestFalseTarget->NumSucc() > 0)) + { + JITDUMP("fgOptimizeBranch: New flow out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; } } @@ -2939,6 +2947,9 @@ bool Compiler::fgOptimizeSwitchJumps() blockToTargetEdge->setLikelihood(fraction); blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction)); + JITDUMP("fgOptimizeSwitchJumps: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n", newBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + // For now we leave the switch as is, since there's no way // to indicate that one of the cases is now unreachable. // @@ -3168,7 +3179,7 @@ bool Compiler::fgExpandRarelyRunBlocks() // If block is not run rarely, then check to make sure that it has // at least one non-rarely run block. - if (!block->isRunRarely()) + if (!block->isRunRarely() && !block->isBBCallFinallyPairTail()) { bool rare = true; diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 9a54e32f58cdfa..3e32a876a9f836 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2019,12 +2019,11 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) estDupCostSz += tree->GetCostSz(); } - weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; - bool haveProfileWeights = false; - bool allProfileWeightsAreValid = false; - weight_t const weightBlock = block->bbWeight; - weight_t const weightTest = bTest->bbWeight; - weight_t const weightTop = bTop->bbWeight; + weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; + bool haveProfileWeights = false; + weight_t const weightBlock = block->bbWeight; + weight_t const weightTest = bTest->bbWeight; + weight_t const weightTop = bTop->bbWeight; // If we have profile data then we calculate the number of times // the loop will iterate into loopIterations @@ -2054,8 +2053,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } else { - allProfileWeightsAreValid = true; - // Determine average iteration count // // weightTop is the number of time this loop executes @@ -2152,10 +2149,10 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // tree walk to count them was not done. printf( "\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i," - "\n loopIterations = %7.3f, optInvertTotalInfo.sharedStaticHelperCount >= %d, validProfileWeights = %s\n", + "\n loopIterations = %7.3f, optInvertTotalInfo.sharedStaticHelperCount >= %d, haveProfileWeights = %s\n", dspTreeID(condTree), costIsTooHigh ? "not done" : "performed", estDupCostSz, costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, - optInvertTotalInfo.sharedStaticHelperCount, dspBool(allProfileWeightsAreValid)); + optInvertTotalInfo.sharedStaticHelperCount, dspBool(haveProfileWeights)); } #endif From 22634d917739b49f8757fc9434821934b06d345a Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Fri, 10 Jan 2025 14:54:55 -0500 Subject: [PATCH 08/11] Move profile checks to after loop recognition --- src/coreclr/jit/compiler.cpp | 13 ++++++------- src/coreclr/jit/fgopt.cpp | 8 +++++--- src/coreclr/jit/optimizer.cpp | 2 +- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 5f86930e8ab8ef..599747c24f4187 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4828,11 +4828,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_OPTIMIZE_FLOW, &Compiler::optOptimizeFlow); - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - // Second pass of tail merge // DoPhase(this, PHASE_HEAD_TAIL_MERGE2, [this]() { @@ -4843,11 +4838,15 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_DFS_BLOCKS3, &Compiler::fgDfsBlocksAndRemove); - // Discover and classify natural loops (e.g. mark iterative loops as such). Also marks loop blocks - // and sets bbWeight to the loop nesting levels. + // Discover and classify natural loops (e.g. mark iterative loops as such). // DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoopsPhase); + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + // Scale block weights and mark run rarely blocks. // DoPhase(this, PHASE_SET_BLOCK_WEIGHTS, &Compiler::optSetBlockWeights); diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index e593cf7478b0e9..5e6045978ab3ac 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -2790,7 +2790,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) // bJump now falls through into the next block // BasicBlock* const bDestFalseTarget = bJump->Next(); - FlowEdge* const falseEdge = fgAddRefPred(bDestFalseTarget, bJump, destFalseEdge); + FlowEdge* const falseEdge = fgAddRefPred(bDestFalseTarget, bJump, destFalseEdge); // bJump now jumps to bDest's normal jump target // @@ -2816,7 +2816,8 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) if ((bDestNormalTarget->NumSucc() > 0) || (bDestFalseTarget->NumSucc() > 0)) { - JITDUMP("fgOptimizeBranch: New flow out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", fgPgoConsistent ? "is now" : "was already"); + JITDUMP("fgOptimizeBranch: New flow out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); fgPgoConsistent = false; } } @@ -2947,7 +2948,8 @@ bool Compiler::fgOptimizeSwitchJumps() blockToTargetEdge->setLikelihood(fraction); blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction)); - JITDUMP("fgOptimizeSwitchJumps: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n", newBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); + JITDUMP("fgOptimizeSwitchJumps: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n", + newBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); fgPgoConsistent = false; // For now we leave the switch as is, since there's no way diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 3e32a876a9f836..0cc84439b6a14b 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2249,7 +2249,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) for (FlowEdge* const predEdge : bTest->PredEdgesEditing()) { BasicBlock* const predBlock = predEdge->getSourceBlock(); - unsigned const bNum = predBlock->bbNum; + unsigned const bNum = predBlock->bbNum; if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum)) { // Looks like the predecessor is from within the potential loop; skip it. From 5dbbb8f956913180f4feff067c544a828035c399 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 14 Jan 2025 15:56:57 -0500 Subject: [PATCH 09/11] Re-enable profile consistency checks --- src/coreclr/jit/compiler.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 56e7344fa65708..a107b8464c11bb 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4814,11 +4814,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_GS_COOKIE, &Compiler::gsPhase); - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - if (opts.OptimizationEnabled()) { // Compute the block weights From 6462e260406ccc8555d21ba361b8c024d304923a Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Tue, 14 Jan 2025 23:18:29 -0500 Subject: [PATCH 10/11] Fix inconsistency in loop finding, block weight phases --- src/coreclr/jit/fgprofile.cpp | 3 +++ src/coreclr/jit/optimizer.cpp | 8 ++++++++ 2 files changed, 11 insertions(+) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 830c8fe0ce7779..9cf332bf16c995 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -4463,6 +4463,9 @@ bool Compiler::fgComputeCalledCount(weight_t returnWeight) { fgFirstBB->setBBProfileWeight(fgCalledCount); madeChanges = true; + JITDUMP("fgComputeCalledCount: Modified method entry weight. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; } #if DEBUG diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 3d286945e9253b..6adb49eed59eba 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2905,6 +2905,14 @@ bool Compiler::optCreatePreheader(FlowGraphNaturalLoop* loop) optSetWeightForPreheaderOrExit(loop, preheader); + if (preheader->hasProfileWeight() && preheader->hasEHBoundaryIn()) + { + JITDUMP("optCreatePreheader: " FMT_BB + " is not reachable via normal flow, so skip checking its entry weight. Data %s inconsistent.\n", + preheader->bbNum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + } + return true; } From 68cc25ab87935f759e9a12326f4eb47178ea3859 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Wed, 15 Jan 2025 13:12:49 -0500 Subject: [PATCH 11/11] Move profile checks to after loop opts --- src/coreclr/jit/compiler.cpp | 5 ----- src/coreclr/jit/loopcloning.cpp | 26 +++++++++----------------- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index a107b8464c11bb..6a6a47a68ba28e 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4854,11 +4854,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoopsPhase); - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - // Scale block weights and mark run rarely blocks. // DoPhase(this, PHASE_SET_BLOCK_WEIGHTS, &Compiler::optSetBlockWeights); diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 0f6b681420a6da..f1350004dd4064 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2011,21 +2011,8 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex bool cloneLoopsWithEH = false; INDEBUG(cloneLoopsWithEH = (JitConfig.JitCloneLoopsWithEH() > 0);) - // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). - unsigned depth = loop->GetDepth(); - weight_t ambientWeight = 1; - for (unsigned j = 0; j < depth; j++) - { - weight_t lastWeight = ambientWeight; - ambientWeight *= BB_LOOP_WEIGHT_SCALE; - assert(ambientWeight > lastWeight); - } - assert(loop->EntryEdges().size() == 1); BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); - // The ambient weight might be higher than we computed above. Be safe by - // taking the max with the head block's weight. - ambientWeight = max(ambientWeight, preheader->bbWeight); // We're going to transform this loop: // @@ -2043,8 +2030,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex BasicBlock* fastPreheader = fgNewBBafter(BBJ_ALWAYS, preheader, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", fastPreheader->bbNum, preheader->bbNum); - fastPreheader->bbWeight = preheader->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; - fastPreheader->CopyFlags(preheader, (BBF_PROF_WEIGHT | BBF_RUN_RARELY)); + fastPreheader->inheritWeight(preheader); assert(preheader->KindIs(BBJ_ALWAYS)); assert(preheader->TargetIs(loop->GetHeader())); @@ -2081,8 +2067,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex const bool extendRegion = BasicBlock::sameEHRegion(bottom, preheader); BasicBlock* slowPreheader = fgNewBBafter(BBJ_ALWAYS, newPred, extendRegion); JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", slowPreheader->bbNum, newPred->bbNum); - slowPreheader->bbWeight = newPred->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; - slowPreheader->CopyFlags(newPred, (BBF_PROF_WEIGHT | BBF_RUN_RARELY)); + slowPreheader->inheritWeight(newPred); slowPreheader->scaleBBWeight(LoopCloneContext::slowPathWeightScaleFactor); // If we didn't extend the region above (because the last loop @@ -3082,6 +3067,13 @@ PhaseStatus Compiler::optCloneLoops() m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); } + + if (fgIsUsingProfileWeights()) + { + JITDUMP("optCloneLoops: Profile data needs to be propagated through new loops. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + } } #ifdef DEBUG