From 8a22b878d004cfa55994af9672c7cfecd5357969 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 11 Feb 2025 10:32:09 +0100 Subject: [PATCH] JIT: Optimize struct parameter register accesses in the backend (#110819) This PR adds an optimization in lowering to utilize the new parameter register to local mappings added in #110795. The optimization detects IR that is going to result in stack spills/loads and instead replaces them with scalar locals that will be able to stay in registers. Physical promotion benefits especially from this as it creates the kind of IR that the optimization ends up kicking in for. The heuristics of physical promotion are updated to account for the fact that the backend is now able to do this optimization, making physical promotion more likely to promote struct parameters. --- src/coreclr/jit/codegen.h | 2 + src/coreclr/jit/codegenarm.cpp | 24 +++ src/coreclr/jit/codegencommon.cpp | 31 +++- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/compiler.hpp | 5 +- src/coreclr/jit/lclvars.cpp | 20 +- src/coreclr/jit/lower.cpp | 297 ++++++++++++++++++++++++++++-- src/coreclr/jit/lower.h | 6 +- src/coreclr/jit/lsra.cpp | 3 +- src/coreclr/jit/lsraarmarch.cpp | 12 +- src/coreclr/jit/lsrabuild.cpp | 40 ++-- src/coreclr/jit/promotion.cpp | 66 ++++++- 13 files changed, 469 insertions(+), 41 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index b8f4d99353208a..2520864b707421 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -444,6 +444,8 @@ class CodeGen final : public CodeGenInterface void genPopFltRegs(regMaskTP regMask); regMaskTP genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat); + regMaskTP genPrespilledUnmappedRegs(); + regMaskTP genJmpCallArgMask(); void genFreeLclFrame(unsigned frameSize, diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 92d6bc8635224e..26975c0130ab9c 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -2102,6 +2102,30 @@ regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskC } } +//----------------------------------------------------------------------------------- +// genPrespilledUnmappedRegs: Get a mask of the registers that are prespilled +// and also not mapped to any locals. +// +// Returns: +// Mask of those registers. These registers can be used safely in prolog as +// they won't be needed after prespilling. +// +regMaskTP CodeGen::genPrespilledUnmappedRegs() +{ + regMaskTP regs = regSet.rsMaskPreSpillRegs(false); + + if (compiler->m_paramRegLocalMappings != nullptr) + { + for (int i = 0; i < compiler->m_paramRegLocalMappings->Height(); i++) + { + const ParameterRegisterLocalMapping& mapping = compiler->m_paramRegLocalMappings->BottomRef(i); + regs &= ~mapping.RegisterSegment->GetRegisterMask(); + } + } + + return regs; +} + //----------------------------------------------------------------------------------- // instGen_MemoryBarrier: Emit a MemoryBarrier instruction // diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 2318d4223ad812..5b280951178aff 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3411,6 +3411,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) // top of the underlying registers. RegGraph graph(compiler); + // Add everything to the graph, or spill directly to stack when needed. + // Note that some registers may be homed in multiple (stack) places. + // Particularly if there is a mapping to a local that does not share its + // (stack) home with the parameter local, in which case we will home it + // both into the parameter local's stack home (if it is used), but also to + // the mapping target. for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) { LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum); @@ -3426,11 +3432,26 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) const ParameterRegisterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()); + bool spillToBaseLocal = true; if (mapping != nullptr) { genSpillOrAddRegisterParam(mapping->LclNum, mapping->Offset, lclNum, segment, &graph); + + // If home is shared with base local, then skip spilling to the + // base local. + if (lclDsc->lvPromoted) + { + spillToBaseLocal = false; + } } - else + +#ifdef TARGET_ARM + // For arm32 the spills to the base local happen as part of + // prespilling sometimes, so skip it in that case. + spillToBaseLocal &= (regSet.rsMaskPreSpillRegs(false) & segment.GetRegisterMask()) == 0; +#endif + + if (spillToBaseLocal) { genSpillOrAddRegisterParam(lclNum, segment.Offset, lclNum, segment, &graph); } @@ -3915,7 +3936,7 @@ void CodeGen::genCheckUseBlockInit() // must force spill R4/R5/R6 so that we can use them during // zero-initialization process. // - int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1; + int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~genPrespilledUnmappedRegs()) - 1; if (forceSpillRegCount > 0) regSet.rsSetRegsModified(RBM_R4); if (forceSpillRegCount > 1) @@ -5347,7 +5368,7 @@ void CodeGen::genFnProlog() // These registers will be available to use for the initReg. We just remove // all of these registers from the rsCalleeRegArgMaskLiveIn. // - intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false); + intRegState.rsCalleeRegArgMaskLiveIn &= ~genPrespilledUnmappedRegs(); #endif /* Choose the register to use for zero initialization */ @@ -5751,6 +5772,10 @@ void CodeGen::genFnProlog() #else genEnregisterOSRArgsAndLocals(); #endif + // OSR functions take no parameters in registers. Ensure no mappings + // are present. + // assert((compiler->m_paramRegLocalMappings == nullptr) || compiler->m_paramRegLocalMappings->Empty()); + compiler->lvaUpdateArgsWithInitialReg(); } else diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 811fa2d56035a9..ed6852d138bd17 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -10419,7 +10419,7 @@ JITDBGAPI void __cdecl dVN(ValueNum vn) cVN(JitTls::GetCompiler(), vn); } -JITDBGAPI void __cdecl dRegMask(regMaskTP mask) +JITDBGAPI void __cdecl dRegMask(const regMaskTP& mask) { static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called printf("===================================================================== dRegMask %u\n", sequenceNumber++); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 2f5d219ed94d43..e3077cfac9ac12 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4202,7 +4202,7 @@ class Compiler #ifdef DEBUG void lvaDumpRegLocation(unsigned lclNum); - void lvaDumpFrameLocation(unsigned lclNum); + void lvaDumpFrameLocation(unsigned lclNum, int minLength); void lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t refCntWtdWidth = 6); void lvaTableDump(FrameLayoutState curState = NO_FRAME_LAYOUT); // NO_FRAME_LAYOUT means use the current frame // layout state defined by lvaDoneFrameLayout diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index ef00369ea1b309..1738810e149b4b 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4260,8 +4260,9 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename) bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum) { LclVarDsc* varDsc = lvaGetDesc(varNum); - bool result = varDsc->lvIsParam || lvaIsOSRLocal(varNum) || (varNum == lvaGSSecurityCookie) || - (varNum == lvaInlinedPInvokeFrameVar) || (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar); + bool result = varDsc->lvIsParam || varDsc->lvIsParamRegTarget || lvaIsOSRLocal(varNum) || + (varNum == lvaGSSecurityCookie) || (varNum == lvaInlinedPInvokeFrameVar) || + (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar); #ifdef TARGET_ARM64 result = result || (varNum == lvaFfrRegister); diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index f9d84a12bc9a28..5b2308634fad83 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4939,8 +4939,8 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) // that was set by past phases. if (!isRecompute) { - varDsc->lvSingleDef = varDsc->lvIsParam; - varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam; + varDsc->lvSingleDef = varDsc->lvIsParam || varDsc->lvIsParamRegTarget; + varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam || varDsc->lvIsParamRegTarget; varDsc->lvAllDefsAreNoGc = (varDsc->lvImplicitlyReferenced == false); } @@ -5033,6 +5033,11 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) varDsc->incRefCnts(BB_UNITY_WEIGHT, this); } } + else if (varDsc->lvIsParamRegTarget && (varDsc->lvRefCnt() > 0)) + { + varDsc->incRefCnts(BB_UNITY_WEIGHT, this); + varDsc->incRefCnts(BB_UNITY_WEIGHT, this); + } // If we have JMP, all arguments must have a location // even if we don't use them inside the method @@ -7370,7 +7375,7 @@ void Compiler::lvaDumpRegLocation(unsigned lclNum) * in its home location. */ -void Compiler::lvaDumpFrameLocation(unsigned lclNum) +void Compiler::lvaDumpFrameLocation(unsigned lclNum, int minLength) { int offset; regNumber baseReg; @@ -7383,7 +7388,12 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum) baseReg = EBPbased ? REG_FPBASE : REG_SPBASE; #endif - printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); + int printed = + printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); + if ((printed >= 0) && (printed < minLength)) + { + printf("%*s", minLength - printed, ""); + } } /***************************************************************************** @@ -7474,7 +7484,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r // location. Otherwise, it's always on the stack. if (lvaDoneFrameLayout != NO_FRAME_LAYOUT) { - lvaDumpFrameLocation(lclNum); + lvaDumpFrameLocation(lclNum, (int)strlen("zero-ref ")); } } } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index eee9a5859ecb43..dfd489eac21b88 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7901,6 +7901,11 @@ PhaseStatus Lowering::DoPhase() comp->lvSetMinOptsDoNotEnreg(); } + if (comp->opts.OptimizationEnabled()) + { + MapParameterRegisterLocals(); + } + for (BasicBlock* const block : comp->Blocks()) { /* Make the block publicly available */ @@ -7916,11 +7921,6 @@ PhaseStatus Lowering::DoPhase() LowerBlock(block); } - if (comp->opts.OptimizationEnabled()) - { - MapParameterRegisterLocals(); - } - #ifdef DEBUG JITDUMP("Lower has completed modifying nodes.\n"); if (VERBOSE) @@ -8030,12 +8030,16 @@ void Lowering::MapParameterRegisterLocals() comp->m_paramRegLocalMappings->Emplace(&segment, fieldLclNum, segment.Offset - fieldDsc->lvFldOffset); } - LclVarDsc* fieldLclDsc = comp->lvaGetDesc(fieldLclNum); - assert(!fieldLclDsc->lvIsParamRegTarget); - fieldLclDsc->lvIsParamRegTarget = true; + assert(!fieldDsc->lvIsParamRegTarget); + fieldDsc->lvIsParamRegTarget = true; } } + if (!comp->opts.IsOSR()) + { + FindInducedParameterRegisterLocals(); + } + #ifdef DEBUG if (comp->verbose) { @@ -8043,14 +8047,285 @@ void Lowering::MapParameterRegisterLocals() for (int i = 0; i < comp->m_paramRegLocalMappings->Height(); i++) { const ParameterRegisterLocalMapping& mapping = comp->m_paramRegLocalMappings->BottomRef(i); - printf(" "); - mapping.RegisterSegment->Dump(); - printf(" -> V%02u\n", mapping.LclNum); + printf(" %s -> V%02u+%u\n", getRegName(mapping.RegisterSegment->GetRegister()), mapping.LclNum, + mapping.Offset); } } #endif } +//------------------------------------------------------------------------ +// Lowering::FindInducedParameterRegisterLocals: +// Find locals that would be profitable to map from parameter registers, +// based on IR in the initialization block. +// +void Lowering::FindInducedParameterRegisterLocals() +{ +#ifdef TARGET_ARM + // On arm32 the profiler hook does not preserve arg registers, so + // parameters are prespilled and cannot stay enregistered. + if (comp->compIsProfilerHookNeeded()) + { + JITDUMP("Skipping FindInducedParameterRegisterLocals on arm32 with profiler hook\n"); + return; + } +#endif + + // Check if we possibly have any parameters we can induce new register + // locals from. + bool anyCandidates = false; + for (unsigned lclNum = 0; lclNum < comp->info.compArgsCount; lclNum++) + { + LclVarDsc* lcl = comp->lvaGetDesc(lclNum); + if (lcl->lvPromoted || !lcl->lvDoNotEnregister) + { + continue; + } + + const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(lclNum); + if (!abiInfo.HasAnyRegisterSegment()) + { + continue; + } + + anyCandidates = true; + break; + } + + if (!anyCandidates) + { + return; + } + + bool hasRegisterKill = false; + LocalSet storedToLocals(comp->getAllocator(CMK_ABI)); + // Now look for optimization opportunities in the first block: places where + // we read fields out of struct parameters that can be mapped cleanly. This + // is frequently created by physical promotion. + for (GenTree* node : LIR::AsRange(comp->fgFirstBB)) + { + hasRegisterKill |= node->IsCall(); + + GenTreeLclVarCommon* storeLcl; + if (node->DefinesLocal(comp, &storeLcl)) + { + storedToLocals.Emplace(storeLcl->GetLclNum(), true); + continue; + } + + if (node->OperIs(GT_LCL_ADDR)) + { + storedToLocals.Emplace(node->AsLclVarCommon()->GetLclNum(), true); + continue; + } + + if (!node->OperIs(GT_LCL_FLD)) + { + continue; + } + + GenTreeLclFld* fld = node->AsLclFld(); + if (fld->GetLclNum() >= comp->info.compArgsCount) + { + continue; + } + + LclVarDsc* paramDsc = comp->lvaGetDesc(fld); + if (paramDsc->lvPromoted) + { + // These are complicated to reason about since they may be + // defined/used through their fields, so just skip them. + continue; + } + + if (fld->TypeIs(TYP_STRUCT)) + { + continue; + } + + if (storedToLocals.Lookup(fld->GetLclNum())) + { + // LCL_FLD does not necessarily take the value of the parameter + // anymore. + continue; + } + + const ABIPassingInformation& dataAbiInfo = comp->lvaGetParameterABIInfo(fld->GetLclNum()); + const ABIPassingSegment* regSegment = nullptr; + for (const ABIPassingSegment& segment : dataAbiInfo.Segments()) + { + if (!segment.IsPassedInRegister()) + { + continue; + } + + assert(fld->GetLclOffs() <= comp->lvaLclExactSize(fld->GetLclNum())); + unsigned structAccessedSize = + min(genTypeSize(fld), comp->lvaLclExactSize(fld->GetLclNum()) - fld->GetLclOffs()); + if ((segment.Offset != fld->GetLclOffs()) || (structAccessedSize > segment.Size) || + (varTypeUsesIntReg(fld) != genIsValidIntReg(segment.GetRegister()))) + { + continue; + } + + // This is a match, but check if it is already remapped. + // TODO-CQ: If it is already remapped, we can reuse the value from + // the remapping. + if (comp->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()) == nullptr) + { + regSegment = &segment; + } + + break; + } + + if (regSegment == nullptr) + { + continue; + } + + JITDUMP("LCL_FLD use [%06u] of unenregisterable parameter corresponds to ", Compiler::dspTreeID(fld)); + DBEXEC(VERBOSE, regSegment->Dump()); + JITDUMP("\n"); + + // Now see if we want to introduce a new local for this value, or if we + // can reuse one because this is the source of a store (frequently + // created by physical promotion). + LIR::Use use; + if (!LIR::AsRange(comp->fgFirstBB).TryGetUse(fld, &use)) + { + JITDUMP(" ..but no use was found\n"); + continue; + } + + unsigned remappedLclNum = TryReuseLocalForParameterAccess(use, storedToLocals); + + if (remappedLclNum == BAD_VAR_NUM) + { + // If we have seen register kills then avoid creating a new local. + // The local is going to have to move from the parameter register + // into a callee saved register, and the callee saved register will + // need to be saved/restored to/from stack anyway. + if (hasRegisterKill) + { + JITDUMP(" ..but use happens after a call and is deemed unprofitable to create a local for\n"); + continue; + } + + remappedLclNum = comp->lvaGrabTemp( + false DEBUGARG(comp->printfAlloc("V%02u.%s", fld->GetLclNum(), getRegName(regSegment->GetRegister())))); + comp->lvaGetDesc(remappedLclNum)->lvType = fld->TypeGet(); + JITDUMP("Created new local V%02u for the mapping\n", remappedLclNum); + } + else + { + JITDUMP("Reusing local V%02u for store from struct parameter register %s. Store:\n", remappedLclNum, + getRegName(regSegment->GetRegister())); + DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), use.User()); + + // The store will be a no-op, so get rid of it + LIR::AsRange(comp->fgFirstBB).Remove(use.User(), true); + use = LIR::Use(); + +#ifdef DEBUG + LclVarDsc* remappedLclDsc = comp->lvaGetDesc(remappedLclNum); + remappedLclDsc->lvReason = + comp->printfAlloc("%s%sV%02u.%s", remappedLclDsc->lvReason == nullptr ? "" : remappedLclDsc->lvReason, + remappedLclDsc->lvReason == nullptr ? "" : " ", fld->GetLclNum(), + getRegName(regSegment->GetRegister())); +#endif + } + + comp->m_paramRegLocalMappings->Emplace(regSegment, remappedLclNum, 0); + comp->lvaGetDesc(remappedLclNum)->lvIsParamRegTarget = true; + + JITDUMP("New mapping: "); + DBEXEC(VERBOSE, regSegment->Dump()); + JITDUMP(" -> V%02u\n", remappedLclNum); + + // Insert explicit normalization for small types (the LCL_FLD we + // are replacing comes with this normalization). + if (varTypeIsSmall(fld)) + { + GenTree* lcl = comp->gtNewLclvNode(remappedLclNum, genActualType(fld)); + GenTree* normalizeLcl = comp->gtNewCastNode(TYP_INT, lcl, false, fld->TypeGet()); + GenTree* storeNormalizedLcl = comp->gtNewStoreLclVarNode(remappedLclNum, normalizeLcl); + LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeNormalizedLcl)); + + JITDUMP("Parameter normalization:\n"); + DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), storeNormalizedLcl); + } + + // If we still have a valid use, then replace the LCL_FLD with a + // LCL_VAR of the remapped parameter register local. + if (use.IsInitialized()) + { + GenTree* lcl = comp->gtNewLclvNode(remappedLclNum, genActualType(fld)); + LIR::AsRange(comp->fgFirstBB).InsertAfter(fld, lcl); + use.ReplaceWith(lcl); + LowerNode(lcl); + JITDUMP("New user tree range:\n"); + DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), use.User()); + } + + fld->gtBashToNOP(); + } +} + +//------------------------------------------------------------------------ +// Lowering::TryReuseLocalForParameterAccess: +// Try to figure out if a LCL_FLD that corresponds to a parameter register is +// being stored directly to a LCL_VAR, and in that case whether it would be +// profitable to reuse that local as the parameter register. +// +// Parameters: +// use - The use of the LCL_FLD +// storedToLocals - Map of locals that have had potential definitions to them +// up until the use +// +// Returns: +// The local number to reuse, or BAD_VAR_NUM to create a new local instead. +// +unsigned Lowering::TryReuseLocalForParameterAccess(const LIR::Use& use, const LocalSet& storedToLocals) +{ + GenTree* useNode = use.User(); + + if (!useNode->OperIs(GT_STORE_LCL_VAR)) + { + return BAD_VAR_NUM; + } + + LclVarDsc* destLclDsc = comp->lvaGetDesc(useNode->AsLclVarCommon()); + + if (destLclDsc->lvIsParamRegTarget) + { + return BAD_VAR_NUM; + } + + if (destLclDsc->lvIsStructField) + { + return BAD_VAR_NUM; + } + + if (destLclDsc->TypeGet() == TYP_STRUCT) + { + return BAD_VAR_NUM; + } + + if (destLclDsc->lvDoNotEnregister) + { + return BAD_VAR_NUM; + } + + if (storedToLocals.Lookup(useNode->AsLclVarCommon()->GetLclNum())) + { + // Destination may change value before this access + return BAD_VAR_NUM; + } + + return useNode->AsLclVarCommon()->GetLclNum(); +} + #ifdef DEBUG //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 7fa3aca9e98511..86b336c517f3c1 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -133,7 +133,11 @@ class Lowering final : public Phase static bool CheckBlock(Compiler* compiler, BasicBlock* block); #endif // DEBUG - void MapParameterRegisterLocals(); + typedef JitHashTable, bool> LocalSet; + + void MapParameterRegisterLocals(); + void FindInducedParameterRegisterLocals(); + unsigned TryReuseLocalForParameterAccess(const LIR::Use& use, const LocalSet& storedToLocals); void LowerBlock(BasicBlock* block); GenTree* LowerNode(GenTree* node); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index dc4f2c312045e4..3958dd2df29d77 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1484,7 +1484,8 @@ void LinearScan::identifyCandidatesExceptionDataflow() assert(varDsc->lvLiveInOutOfHndlr); - if (varTypeIsGC(varDsc) && VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam) + if (varTypeIsGC(varDsc) && VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam && + !varDsc->lvIsParamRegTarget) { assert(varDsc->lvMustInit); } diff --git a/src/coreclr/jit/lsraarmarch.cpp b/src/coreclr/jit/lsraarmarch.cpp index 518ae815f867c8..b3c6c7d4cf788d 100644 --- a/src/coreclr/jit/lsraarmarch.cpp +++ b/src/coreclr/jit/lsraarmarch.cpp @@ -457,13 +457,19 @@ int LinearScan::BuildPutArgSplit(GenTreePutArgSplit* argNode) // go into registers. for (unsigned regIndex = 0; regIndex < currentRegCount; regIndex++) { - SingleTypeRegSet sourceMask = RBM_NONE; if (sourceRegCount < argNode->gtNumRegs) { - sourceMask = genSingleTypeRegMask((regNumber)((unsigned)argReg + sourceRegCount)); + regNumber nextArgReg = (regNumber)((unsigned)argReg + sourceRegCount); + SingleTypeRegSet sourceMask = genSingleTypeRegMask(nextArgReg); + BuildUse(node, sourceMask, regIndex); + placedArgRegs.AddRegNumInMask(nextArgReg); } + else + { + BuildUse(node, RBM_NONE, regIndex); + } + sourceRegCount++; - BuildUse(node, sourceMask, regIndex); } } srcCount += sourceRegCount; diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 25964ec22f24fb..a3a0ad88b80f4a 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2066,7 +2066,7 @@ void LinearScan::insertZeroInitRefPositions() while (iter.NextElem(&varIndex)) { LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); - if (!varDsc->lvIsParam && isCandidateVar(varDsc)) + if (!varDsc->lvIsParam && !varDsc->lvIsParamRegTarget && isCandidateVar(varDsc)) { JITDUMP("V%02u is a finally var:", compiler->lvaTrackedIndexToLclNum(varIndex)); Interval* interval = getIntervalForLocalVar(varIndex); @@ -2188,6 +2188,7 @@ void LinearScan::buildIntervals() // locals we are expecting to store the registers into in the prolog. for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) { + LclVarDsc* lcl = compiler->lvaGetDesc(lclNum); const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); for (const ABIPassingSegment& seg : abiInfo.Segments()) { @@ -2199,18 +2200,35 @@ void LinearScan::buildIntervals() const ParameterRegisterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(seg.GetRegister()); - unsigned mappedLclNum = mapping != nullptr ? mapping->LclNum : lclNum; - JITDUMP("Arg V%02u in reg %s\n", mappedLclNum, getRegName(seg.GetRegister())); - LclVarDsc* argDsc = compiler->lvaGetDesc(mappedLclNum); - if (argDsc->lvTracked && !compiler->compJmpOpUsed && (argDsc->lvRefCnt() == 0) && - !compiler->opts.compDbgCode) + bool isParameterLive = !lcl->lvTracked || compiler->compJmpOpUsed || (lcl->lvRefCnt() != 0); + bool isLive; + if (mapping != nullptr) { - // Not live - continue; + LclVarDsc* mappedLcl = compiler->lvaGetDesc(mapping->LclNum); + bool isMappedLclLive = !mappedLcl->lvTracked || compiler->compJmpOpUsed || (mappedLcl->lvRefCnt() != 0); + if (mappedLcl->lvIsStructField) + { + // Struct fields are not saved into their parameter local + isLive = isMappedLclLive; + } + else + { + isLive = isParameterLive || isMappedLclLive; + } + } + else + { + isLive = isParameterLive; } - RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? floatRegState : intRegState; - regState->rsCalleeRegArgMaskLiveIn |= seg.GetRegisterMask(); + JITDUMP("Arg V%02u is %s in reg %s\n", mapping != nullptr ? mapping->LclNum : lclNum, + isLive ? "live" : "dead", getRegName(seg.GetRegister())); + + if (isLive) + { + RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? floatRegState : intRegState; + regState->rsCalleeRegArgMaskLiveIn |= seg.GetRegisterMask(); + } } } @@ -4468,7 +4486,7 @@ int LinearScan::BuildPutArgReg(GenTreeUnOp* node) SingleTypeRegSet argMask = genSingleTypeRegMask(argReg); RefPosition* use = BuildUse(op1, argMask); - // Record that this register is occupied by a register now. + // Record that this register is occupied by an argument now. placedArgRegs.AddRegNumInMask(argReg); if (supportsSpecialPutArg() && isCandidateLocalRef(op1) && ((op1->gtFlags & GTF_VAR_DEATH) == 0)) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 43ae688490615e..7db809bc5cadf8 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -767,12 +767,34 @@ class LocalUses unsigned countReadBacks = 0; weight_t countReadBacksWtd = 0; - // For parameters or OSR locals we always need one read back. - if (lcl->lvIsParam || lcl->lvIsOSRLocal) + + // For OSR locals we always need one read back. + if (lcl->lvIsOSRLocal) { countReadBacks++; countReadBacksWtd += comp->fgFirstBB->getBBWeight(comp); } + else if (lcl->lvIsParam) + { + // For parameters, the backend may be able to map it directly from a register. + if (MapsToRegister(comp, access, lclNum)) + { + // No promotion will result in a store to stack in the prolog. + costWithout += COST_STRUCT_ACCESS_CYCLES * comp->fgFirstBB->getBBWeight(comp); + sizeWithout += COST_STRUCT_ACCESS_SIZE; + + // Promotion we cost like the normal reg accesses above + costWith += COST_REG_ACCESS_CYCLES * comp->fgFirstBB->getBBWeight(comp); + sizeWith += COST_REG_ACCESS_SIZE; + } + else + { + // Otherwise we expect no prolog work to be required if we + // don't promote, and we need a read back from the stack. + countReadBacks++; + countReadBacksWtd += comp->fgFirstBB->getBBWeight(comp); + } + } // If the struct is stored from a call (either due to a multireg // return or by being passed as the retbuffer) then we need a readback @@ -1000,6 +1022,46 @@ class LocalUses return nullptr; } + + //------------------------------------------------------------------------ + // MapsToRegister: + // Check if a specific access in the specified parameter local is + // expected to map to a register. + // + // Parameters: + // comp - Compiler instance + // access - Access in the local + // lclNum - Parameter lcl num + // + // Returns: + // Pointer to a matching access, or nullptr if no match was found. + // + bool MapsToRegister(Compiler* comp, const Access& access, unsigned lclNum) + { + assert(lclNum < comp->info.compArgsCount); + + if (comp->lvaIsImplicitByRefLocal(lclNum)) + { + return false; + } + + const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(lclNum); + if (abiInfo.HasAnyStackSegment()) + { + return false; + } + + for (const ABIPassingSegment& seg : abiInfo.Segments()) + { + if ((access.Offset == seg.Offset) && (genTypeSize(access.AccessType) == seg.Size) && + (varTypeUsesIntReg(access.AccessType) == genIsValidIntReg(seg.GetRegister()))) + { + return true; + } + } + + return false; + } }; // Struct used to save all struct stores involving physical promotion candidates.