From c403a06cf58e373d54187b0855e96a8794a88096 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 3 May 2024 18:12:36 -0700 Subject: [PATCH] JIT: profile checking through inlining (#101834) Advance profile consistency check through inlining. Turns out there are five reasons why inlining may make profile data inconsistent. Account for these and add metrics. Also add separate metrics for consistency before and after inlining, since pre-inline phases are run on inlinees and so don't give us good insight into overall consistency rates. And add some metrics for inlining itself. Contributes to #93020. Co-authored-by: Aman Khalid --- src/coreclr/jit/compiler.cpp | 10 +- src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/fgbasic.cpp | 38 ++--- src/coreclr/jit/fginline.cpp | 150 +++++++++++++++++++- src/coreclr/jit/fgprofile.cpp | 41 ++++-- src/coreclr/jit/importer.cpp | 28 ++-- src/coreclr/jit/indirectcalltransformer.cpp | 10 +- src/coreclr/jit/inline.h | 6 + src/coreclr/jit/jitmetadatalist.h | 10 ++ 9 files changed, 252 insertions(+), 44 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 8e82707adbbbfd..0c812ffd935dbb 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4697,11 +4697,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl return; } - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - // At this point in the phase list, all the inlinee phases have // been run, and inlinee compiles have exited, so we should only // get this far if we are jitting the root method. @@ -4718,6 +4713,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // Record "start" values for post-inlining cycles and elapsed time. RecordStateAtEndOfInlining(); + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + // Transform each GT_ALLOCOBJ node into either an allocation helper call or // local variable allocation on the stack. ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 050f2e001ea294..1c6950a55296cc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5284,6 +5284,7 @@ class Compiler // The number of separate return points in the method. unsigned fgReturnCount; + unsigned fgThrowCount; PhaseStatus fgAddInternal(); @@ -6228,7 +6229,7 @@ class Compiler void fgLinkBasicBlocks(); - unsigned fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget); + void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget); void fgCheckBasicBlockControlFlow(); diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index af02d257d5dc29..538c49e530ecb1 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -50,6 +50,7 @@ void Compiler::fgInit() fgDomBBcount = 0; fgBBVarSetsInited = false; fgReturnCount = 0; + fgThrowCount = 0; m_dfsTree = nullptr; m_loops = nullptr; @@ -233,8 +234,9 @@ bool Compiler::fgEnsureFirstBBisScratch() { // If the result is clearly nonsensical, just inherit // - JITDUMP("\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsisent.\n", - fgPgoConsistent ? "is now" : "was already"); + JITDUMP( + "\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); if (fgPgoConsistent) { @@ -3099,19 +3101,18 @@ void Compiler::fgLinkBasicBlocks() // codeSize -- length of the IL stream // jumpTarget -- [in] bit vector of jump targets found by fgFindJumpTargets // -// Returns: -// number of return blocks (BBJ_RETURN) in the method (may be zero) -// // Notes: -// Invoked for prejited and jitted methods, and for all inlinees - -unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget) +// Invoked for prejitted and jitted methods, and for all inlinees. +// Sets fgReturnCount and fgThrowCount +// +void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget) { - unsigned retBlocks = 0; - const BYTE* codeBegp = codeAddr; - const BYTE* codeEndp = codeAddr + codeSize; - bool tailCall = false; - unsigned curBBoffs = 0; + unsigned retBlocks = 0; + unsigned throwBlocks = 0; + const BYTE* codeBegp = codeAddr; + const BYTE* codeEndp = codeAddr + codeSize; + bool tailCall = false; + unsigned curBBoffs = 0; BasicBlock* curBBdesc; // Keep track of where we are in the scope lists, as we will also @@ -3312,7 +3313,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F // can be dispatched as tail calls from the caller. compInlineResult->NoteFatal(InlineObservation::CALLEE_EXPLICIT_TAIL_PREFIX); retBlocks++; - return retBlocks; + fgReturnCount = retBlocks; + return; } FALLTHROUGH; @@ -3415,6 +3417,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F case CEE_THROW: case CEE_RETHROW: + throwBlocks++; jmpKind = BBJ_THROW; break; @@ -3597,7 +3600,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F fgLinkBasicBlocks(); - return retBlocks; + fgReturnCount = retBlocks; + fgThrowCount = throwBlocks; } /***************************************************************************** @@ -3709,7 +3713,7 @@ void Compiler::fgFindBasicBlocks() /* Now create the basic blocks */ - fgReturnCount = fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget); + fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget); if (compIsForInlining()) { @@ -4269,7 +4273,7 @@ void Compiler::fgFixEntryFlowForOSR() // if ((fgEntryBB->bbPreds != nullptr) && (fgEntryBB != fgOSREntryBB)) { - JITDUMP("OSR: profile data could not be locally repaired. Data %s inconsisent.\n", + JITDUMP("OSR: profile data could not be locally repaired. Data %s inconsistent.\n", fgPgoConsistent ? "is now" : "was already"); fgPgoConsistent = false; } diff --git a/src/coreclr/jit/fginline.cpp b/src/coreclr/jit/fginline.cpp index 8fcea689c6c993..7a6246a988fda3 100644 --- a/src/coreclr/jit/fginline.cpp +++ b/src/coreclr/jit/fginline.cpp @@ -624,24 +624,86 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitordspTreeID(tree), block->bbNum); + m_compiler->Metrics.InlinerBranchFold++; // We have a constant operand, and should have the all clear to optimize. // Update side effects on the tree, assert there aren't any, and bash to nop. m_compiler->gtUpdateNodeSideEffects(tree); assert((tree->gtFlags & GTF_SIDE_EFFECT) == 0); tree->gtBashToNOP(); - m_madeChanges = true; + m_madeChanges = true; + FlowEdge* removedEdge = nullptr; if (condTree->IsIntegralConst(0)) { - m_compiler->fgRemoveRefPred(block->GetTrueEdge()); + removedEdge = block->GetTrueEdge(); + m_compiler->fgRemoveRefPred(removedEdge); block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetFalseEdge()); } else { - m_compiler->fgRemoveRefPred(block->GetFalseEdge()); + removedEdge = block->GetFalseEdge(); + m_compiler->fgRemoveRefPred(removedEdge); block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetTrueEdge()); } + + // Update profile; make it consistent if possible + // + if (block->hasProfileWeight()) + { + bool repairWasComplete = true; + weight_t const weight = removedEdge->getLikelyWeight(); + + if (weight > 0) + { + // Target block weight will increase. + // + BasicBlock* const target = block->GetTarget(); + assert(target->hasProfileWeight()); + target->setBBProfileWeight(target->bbWeight + weight); + + // Alternate weight will decrease + // + BasicBlock* const alternate = removedEdge->getDestinationBlock(); + assert(alternate->hasProfileWeight()); + weight_t const alternateNewWeight = alternate->bbWeight - weight; + + // If profile weights are consistent, expect at worst a slight underflow. + // + if (m_compiler->fgPgoConsistent && (alternateNewWeight < 0)) + { + assert(m_compiler->fgProfileWeightsEqual(alternateNewWeight, 0)); + } + alternate->setBBProfileWeight(max(0.0, alternateNewWeight)); + + // This will affect profile transitively, so in general + // the profile will become inconsistent. + // + repairWasComplete = false; + + // But we can check for the special case where the + // block's postdominator is target's target (simple + // if/then/else/join). + // + if (target->KindIs(BBJ_ALWAYS)) + { + repairWasComplete = + alternate->KindIs(BBJ_ALWAYS) && alternate->TargetIs(target->GetTarget()); + } + } + + if (!repairWasComplete) + { + JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n", + m_compiler->fgPgoConsistent ? "is now" : "was already"); + + if (m_compiler->fgPgoConsistent) + { + m_compiler->Metrics.ProfileInconsistentInlinerBranchFold++; + m_compiler->fgPgoConsistent = false; + } + } + } } } else @@ -692,6 +754,11 @@ PhaseStatus Compiler::fgInline() JitConfig.JitPrintInlinedMethods().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args); #endif // DEBUG + if (fgPgoConsistent) + { + Metrics.ProfileConsistentBeforeInline++; + } + noway_assert(fgFirstBB != nullptr); BasicBlock* block = fgFirstBB; @@ -822,6 +889,14 @@ PhaseStatus Compiler::fgInline() fgRenumberBlocks(); } + if (fgPgoConsistent) + { + Metrics.ProfileConsistentAfterInline++; + } + + Metrics.InlineCount = m_inlineStrategy->GetInlineCount(); + Metrics.InlineAttempt = m_inlineStrategy->GetImportCount(); + return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } @@ -1611,6 +1686,75 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo) } #endif + // Update profile consistency + // + // If inlinee is inconsistent, root method will be inconsistent too. + // + if (!InlineeCompiler->fgPgoConsistent) + { + if (fgPgoConsistent) + { + JITDUMP("INLINER: profile data in root now inconsistent -- inlinee had inconsistency\n"); + Metrics.ProfileInconsistentInlinee++; + fgPgoConsistent = false; + } + } + + // If we inline a no-return call at a site with profile weight, + // we will introduce inconsistency. + // + if (InlineeCompiler->fgReturnCount == 0) + { + JITDUMP("INLINER: no-return inlinee\n"); + + if (iciBlock->bbWeight > 0) + { + if (fgPgoConsistent) + { + JITDUMP("INLINER: profile data in root now inconsistent -- no-return inlinee at call site in " FMT_BB + " with weight " FMT_WT "\n", + iciBlock->bbNum, iciBlock->bbWeight); + Metrics.ProfileInconsistentNoReturnInlinee++; + fgPgoConsistent = false; + } + } + else + { + // Inlinee scaling should assure this is so. + // + assert(InlineeCompiler->fgFirstBB->bbWeight == 0); + } + } + + // If the call site is not in a try and the callee has a throw, + // we may introduce inconsistency. + // + // Technically we should check if the callee has a throw not in a try, but since + // we can't inline methods with EH yet we don't see those. + // + if (InlineeCompiler->fgThrowCount > 0) + { + JITDUMP("INLINER: may-throw inlinee\n"); + + if (iciBlock->bbWeight > 0) + { + if (fgPgoConsistent) + { + JITDUMP("INLINER: profile data in root now inconsistent -- may-throw inlinee at call site in " FMT_BB + " with weight " FMT_WT "\n", + iciBlock->bbNum, iciBlock->bbWeight); + Metrics.ProfileInconsistentMayThrowInlinee++; + fgPgoConsistent = false; + } + } + else + { + // Inlinee scaling should assure this is so. + // + assert(InlineeCompiler->fgFirstBB->bbWeight == 0); + } + } + // If an inlinee needs GS cookie we need to make sure that the cookie will not be allocated at zero stack offset. // Note that if the root method needs GS cookie then this has already been taken care of. if (!getNeedsGSSecurityCookie() && InlineeCompiler->getNeedsGSSecurityCookie()) diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 6d7ef484fe9a87..1afd6bcda76316 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -141,22 +141,39 @@ void Compiler::fgApplyProfileScale() JITDUMP(" ... no callee profile data, will use non-pgo weight to scale\n"); } - // Ostensibly this should be fgCalledCount for the callee, but that's not available - // as it requires some analysis. + // Determine the weight of the first block preds, if any. + // (only happens if the first block is a loop head). // - // For most callees it will be the same as the entry block count. - // - // Note when/if we early do normalization this may need to change. + weight_t firstBlockPredWeight = 0; + for (FlowEdge* const firstBlockPred : fgFirstBB->PredEdges()) + { + firstBlockPredWeight += firstBlockPred->getLikelyWeight(); + } + + // Determine the "input" weight for the callee // weight_t calleeWeight = fgFirstBB->bbWeight; - // Callee entry weight is nonzero? + // Callee entry weight is zero or negative (taking backedges into account)? // If so, just choose the smallest plausible weight. // - if (calleeWeight == BB_ZERO_WEIGHT) + if (calleeWeight <= firstBlockPredWeight) { calleeWeight = fgHaveProfileWeights() ? 1.0 : BB_UNITY_WEIGHT; - JITDUMP(" ... callee entry has weight zero, will use weight of " FMT_WT " to scale\n", calleeWeight); + JITDUMP(" ... callee entry has zero or negative weight, will use weight of " FMT_WT " to scale\n", + calleeWeight); + JITDUMP("Profile data could not be scaled consistently. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); + + if (fgPgoConsistent) + { + Metrics.ProfileInconsistentInlineeScale++; + fgPgoConsistent = false; + } + } + else + { + calleeWeight -= firstBlockPredWeight; } // Call site has profile weight? @@ -2829,6 +2846,14 @@ PhaseStatus Compiler::fgInstrumentMethod() // PhaseStatus Compiler::fgIncorporateProfileData() { + // For now we only rely on profile data when optimizing. + // + if (!opts.OptimizationEnabled()) + { + JITDUMP("not optimizing, so not incorporating any profile data\n"); + return PhaseStatus::MODIFIED_NOTHING; + } + // Are we doing profile stress? // if (fgStressBBProf() > 0) diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 4998dbd6293fa9..97aa8af7f2d5f8 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -5136,10 +5136,14 @@ void Compiler::impResetLeaveBlock(BasicBlock* block, unsigned jmpAddr) // We are unlikely to be able to repair the profile. // For now we don't even try. // - JITDUMP("\nimpResetLeaveBlock: Profile data could not be locally repaired. Data %s inconsisent.\n", + JITDUMP("\nimpResetLeaveBlock: Profile data could not be locally repaired. Data %s inconsistent.\n", fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; - Metrics.ProfileInconsistentResetLeave++; + + if (fgPgoConsistent) + { + Metrics.ProfileInconsistentResetLeave++; + fgPgoConsistent = false; + } } } @@ -7371,8 +7375,12 @@ void Compiler::impImportBlockCode(BasicBlock* block) { JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n", fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; - Metrics.ProfileInconsistentImporterBranchFold++; + + if (fgPgoConsistent) + { + Metrics.ProfileInconsistentImporterBranchFold++; + fgPgoConsistent = false; + } } } } @@ -7657,10 +7665,14 @@ void Compiler::impImportBlockCode(BasicBlock* block) // We are unlikely to be able to repair the profile. // For now we don't even try. // - JITDUMP("Profile data could not be locally repaired. Data %s inconsisent.\n", + JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n", fgPgoConsistent ? "is now" : "was already"); - fgPgoConsistent = false; - Metrics.ProfileInconsistentImporterSwitchFold++; + + if (fgPgoConsistent) + { + Metrics.ProfileInconsistentImporterSwitchFold++; + fgPgoConsistent = false; + } } // Create a NOP node diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 3f0c311e84a614..92cac9245fc638 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -1248,8 +1248,14 @@ class IndirectCallTransformer if (!isReasonableUnderflow) { - compiler->fgPgoConsistent = false; - compiler->Metrics.ProfileInconsistentChainedGDV++; + JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n", + compiler->fgPgoConsistent ? "is now" : "was already"); + + if (compiler->fgPgoConsistent) + { + compiler->Metrics.ProfileInconsistentChainedGDV++; + compiler->fgPgoConsistent = false; + } } } diff --git a/src/coreclr/jit/inline.h b/src/coreclr/jit/inline.h index 8c1cb56124ad2e..a2aeba6fed7254 100644 --- a/src/coreclr/jit/inline.h +++ b/src/coreclr/jit/inline.h @@ -993,6 +993,12 @@ class InlineStrategy m_ImportCount++; } + // Return number of import attempts + unsigned GetImportCount() const + { + return m_ImportCount; + } + // Inform strategy about the inline decision for a prejit root void NotePrejitDecision(const InlineResult& r) { diff --git a/src/coreclr/jit/jitmetadatalist.h b/src/coreclr/jit/jitmetadatalist.h index 43afa6384d4c67..e077f9f57b2559 100644 --- a/src/coreclr/jit/jitmetadatalist.h +++ b/src/coreclr/jit/jitmetadatalist.h @@ -53,6 +53,11 @@ JITMETADATAMETRIC(ClassGDV, int, 0) JITMETADATAMETRIC(MethodGDV, int, 0) JITMETADATAMETRIC(MultiGuessGDV, int, 0) JITMETADATAMETRIC(ChainedGDV, int, 0) +JITMETADATAMETRIC(InlinerBranchFold, int, 0) +JITMETADATAMETRIC(InlineAttempt, int, 0) +JITMETADATAMETRIC(InlineCount, int, 0) +JITMETADATAMETRIC(ProfileConsistentBeforeInline, int, 0) +JITMETADATAMETRIC(ProfileConsistentAfterInline, int, 0) JITMETADATAMETRIC(ProfileSynthesizedBlendedOrRepaired, int, 0) JITMETADATAMETRIC(ProfileInconsistentInitially, int, 0) JITMETADATAMETRIC(ProfileInconsistentResetLeave, int, 0) @@ -60,6 +65,11 @@ JITMETADATAMETRIC(ProfileInconsistentImporterBranchFold, int, 0) JITMETADATAMETRIC(ProfileInconsistentImporterSwitchFold, int, 0) JITMETADATAMETRIC(ProfileInconsistentChainedGDV, int, 0) JITMETADATAMETRIC(ProfileInconsistentScratchBB, int, 0) +JITMETADATAMETRIC(ProfileInconsistentInlinerBranchFold, int, 0) +JITMETADATAMETRIC(ProfileInconsistentInlineeScale, int, 0) +JITMETADATAMETRIC(ProfileInconsistentInlinee, int, 0) +JITMETADATAMETRIC(ProfileInconsistentNoReturnInlinee, int, 0) +JITMETADATAMETRIC(ProfileInconsistentMayThrowInlinee, int, 0) #undef JITMETADATA #undef JITMETADATAINFO