From 794536f577e81d7a556dc79a54e0a12a97f8fee7 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 17 Dec 2024 14:49:12 +0100 Subject: [PATCH 01/35] Foo --- src/coreclr/jit/abi.cpp | 25 ++-- src/coreclr/jit/abi.h | 4 + src/coreclr/jit/codegen.h | 2 +- src/coreclr/jit/codegencommon.cpp | 117 ++++++++--------- src/coreclr/jit/compiler.cpp | 54 ++++++++ src/coreclr/jit/compiler.h | 30 ++++- src/coreclr/jit/liveness.cpp | 4 +- src/coreclr/jit/lower.cpp | 179 +++++++++++++++++++++++++ src/coreclr/jit/lower.h | 2 + src/coreclr/jit/lsra.h | 14 -- src/coreclr/jit/lsrabuild.cpp | 209 +++++------------------------- 11 files changed, 375 insertions(+), 265 deletions(-) diff --git a/src/coreclr/jit/abi.cpp b/src/coreclr/jit/abi.cpp index 04043c359246e5..f31b14531aec8c 100644 --- a/src/coreclr/jit/abi.cpp +++ b/src/coreclr/jit/abi.cpp @@ -465,15 +465,24 @@ void ABIPassingInformation::Dump() const } const ABIPassingSegment& seg = Segment(i); + seg.Dump(); + printf("\n"); + } +} - if (seg.IsPassedInRegister()) - { - printf("[%02u..%02u) reg %s\n", seg.Offset, seg.Offset + seg.Size, getRegName(seg.GetRegister())); - } - else - { - printf("[%02u..%02u) stack @ +%02u\n", seg.Offset, seg.Offset + seg.Size, seg.GetStackOffset()); - } +//----------------------------------------------------------------------------- +// Dump: +// Dump the ABIPassingSegment to stdout. +// +void ABIPassingSegment::Dump() const +{ + if (IsPassedInRegister()) + { + printf("[%02u..%02u) reg %s", Offset, Offset + Size, getRegName(GetRegister())); + } + else + { + printf("[%02u..%02u) stack @ +%02u", Offset, Offset + Size, GetStackOffset()); } } #endif diff --git a/src/coreclr/jit/abi.h b/src/coreclr/jit/abi.h index 66566f44db5fa6..e1851ca8a6e92f 100644 --- a/src/coreclr/jit/abi.h +++ b/src/coreclr/jit/abi.h @@ -38,6 +38,10 @@ class ABIPassingSegment static ABIPassingSegment InRegister(regNumber reg, unsigned offset, unsigned size); static ABIPassingSegment OnStack(unsigned stackOffset, unsigned offset, unsigned size); + +#ifdef DEBUG + void Dump() const; +#endif }; class ABIPassingSegmentIterator diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 411a97129d4b74..8d6d2ef5684cc4 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -260,7 +260,7 @@ class CodeGen final : public CodeGenInterface regMaskTP genGetParameterHomingTempRegisterCandidates(); var_types genParamStackType(LclVarDsc* dsc, const ABIPassingSegment& seg); - void genSpillOrAddRegisterParam(unsigned lclNum, class RegGraph* graph); + void genSpillOrAddRegisterParam(unsigned lclNum, const ABIPassingSegment& seg, class RegGraph* graph); void genSpillOrAddNonStandardRegisterParam(unsigned lclNum, regNumber sourceReg, class RegGraph* graph); void genEnregisterIncomingStackArgs(); #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 928fa6a6459b5d..eb9a40aa095094 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3239,10 +3239,11 @@ var_types CodeGen::genParamStackType(LclVarDsc* dsc, const ABIPassingSegment& se // to stack immediately, or by adding it to the register graph. // // Parameters: -// lclNum - Parameter local (or field of it) -// graph - The register graph to add to +// lclNum - Parameter local (or field of it) +// segment - Register segment to either spill or put in the register graph +// graph - The register graph to add to // -void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) +void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, const ABIPassingSegment& segment, RegGraph* graph) { regMaskTP paramRegs = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum); @@ -3250,72 +3251,59 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, RegGraph* graph) unsigned baseOffset = varDsc->lvIsStructField ? varDsc->lvFldOffset : 0; unsigned size = varDsc->lvExactSize(); - unsigned paramLclNum = varDsc->lvIsStructField ? varDsc->lvParentLcl : lclNum; - LclVarDsc* paramVarDsc = compiler->lvaGetDesc(paramLclNum); - const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(paramLclNum); - for (const ABIPassingSegment& seg : abiInfo.Segments()) + if (!segment.IsPassedInRegister() || ((paramRegs & genRegMask(segment.GetRegister())) == 0)) { - if (!seg.IsPassedInRegister() || ((paramRegs & genRegMask(seg.GetRegister())) == 0)) - { - continue; - } + return; + } - if (seg.Offset + seg.Size <= baseOffset) - { - continue; - } + if (varDsc->lvOnFrame && (!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr)) + { + unsigned paramLclNum = varDsc->lvIsStructField ? varDsc->lvParentLcl : lclNum; + LclVarDsc* paramVarDsc = compiler->lvaGetDesc(paramLclNum); - if (baseOffset + size <= seg.Offset) + var_types storeType = genParamStackType(paramVarDsc, segment); + if ((varDsc->TypeGet() != TYP_STRUCT) && (genTypeSize(genActualType(varDsc)) < genTypeSize(storeType))) { - continue; + // Can happen for struct fields due to padding. + storeType = genActualType(varDsc); } - if (varDsc->lvOnFrame && (!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr)) - { - var_types storeType = genParamStackType(paramVarDsc, seg); - if ((varDsc->TypeGet() != TYP_STRUCT) && (genTypeSize(genActualType(varDsc)) < genTypeSize(storeType))) - { - // Can happen for struct fields due to padding. - storeType = genActualType(varDsc); - } - - GetEmitter()->emitIns_S_R(ins_Store(storeType), emitActualTypeSize(storeType), seg.GetRegister(), lclNum, - seg.Offset - baseOffset); - } + GetEmitter()->emitIns_S_R(ins_Store(storeType), emitActualTypeSize(storeType), segment.GetRegister(), lclNum, + segment.Offset - baseOffset); + } - if (!varDsc->lvIsInReg()) - { - continue; - } + if (!varDsc->lvIsInReg()) + { + return; + } - var_types edgeType = genActualType(varDsc->GetRegisterType()); - // Some parameters can be passed in multiple registers but enregistered - // in a single one (e.g. SIMD types on arm64). In this case the edges - // we add here represent insertions of each element. - if (seg.Size < genTypeSize(edgeType)) - { - edgeType = seg.GetRegisterType(); - } + var_types edgeType = genActualType(varDsc->GetRegisterType()); + // Some parameters can be passed in multiple registers but enregistered + // in a single one (e.g. SIMD types on arm64). In this case the edges + // we add here represent insertions of each element. + if (segment.Size < genTypeSize(edgeType)) + { + edgeType = segment.GetRegisterType(); + } - RegNode* sourceReg = graph->GetOrAdd(seg.GetRegister()); - RegNode* destReg = graph->GetOrAdd(varDsc->GetRegNum()); + RegNode* sourceReg = graph->GetOrAdd(segment.GetRegister()); + RegNode* destReg = graph->GetOrAdd(varDsc->GetRegNum()); - if ((sourceReg != destReg) || (baseOffset != seg.Offset)) - { + if ((sourceReg != destReg) || (baseOffset != segment.Offset)) + { #ifdef TARGET_ARM - if (edgeType == TYP_DOUBLE) - { - assert(seg.Offset == baseOffset); - graph->AddEdge(sourceReg, destReg, TYP_FLOAT, 0); + if (edgeType == TYP_DOUBLE) + { + assert(segment.Offset == baseOffset); + graph->AddEdge(sourceReg, destReg, TYP_FLOAT, 0); - sourceReg = graph->GetOrAdd(REG_NEXT(sourceReg->reg)); - destReg = graph->GetOrAdd(REG_NEXT(destReg->reg)); - graph->AddEdge(sourceReg, destReg, TYP_FLOAT, 0); - continue; - } -#endif - graph->AddEdge(sourceReg, destReg, edgeType, seg.Offset - baseOffset); + sourceReg = graph->GetOrAdd(REG_NEXT(sourceReg->reg)); + destReg = graph->GetOrAdd(REG_NEXT(destReg->reg)); + graph->AddEdge(sourceReg, destReg, TYP_FLOAT, 0); + return; } +#endif + graph->AddEdge(sourceReg, destReg, edgeType, segment.Offset - baseOffset); } } @@ -3409,18 +3397,19 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) { LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum); + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); - if (compiler->lvaGetPromotionType(lclNum) == Compiler::PROMOTION_TYPE_INDEPENDENT) + for (const ABIPassingSegment& segment : abiInfo.Segments()) { - for (unsigned fld = 0; fld < lclDsc->lvFieldCnt; fld++) + if (!segment.IsPassedInRegister()) { - unsigned fieldLclNum = lclDsc->lvFieldLclStart + fld; - genSpillOrAddRegisterParam(fieldLclNum, &graph); + continue; } - } - else - { - genSpillOrAddRegisterParam(lclNum, &graph); + + const RegisterParameterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()); + + unsigned fieldLclNum = mapping != nullptr ? mapping->LclNum : lclNum; + genSpillOrAddRegisterParam(fieldLclNum, segment, &graph); } } diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index e2bae6d02fd67f..42cfc101e934b3 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4221,6 +4221,60 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) } #endif // TARGET_ARMARCH || TARGET_RISCV64 +//------------------------------------------------------------------------ +// FindParameterRegisterLocalMappingByRegister: +// Try to find a mapping that maps a particular parameter register to an +// incoming defined local. +// +// Returns: +// The mapping, or nullptr if no mapping was found for this register. +// +const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMappingByRegister(regNumber reg) +{ + if (m_regParamLocalMappings == nullptr) + { + return nullptr; + } + + for (int i = 0; i < m_regParamLocalMappings->Height(); i++) + { + const RegisterParameterLocalMapping& mapping = m_regParamLocalMappings->BottomRef(i); + if (mapping.RegisterSegment->GetRegister() == reg) + { + return &mapping; + } + } + + return nullptr; +} + +//------------------------------------------------------------------------ +// FindParameterRegisterLocalMappingByLocal: +// Try to find a mapping that maps a particular local from an incoming +// parameter register. +// +// Returns: +// The mapping, or nullptr if no mapping was found for this local. +// +const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMappingByLocal(unsigned lclNum) +{ + if (m_regParamLocalMappings == nullptr) + { + return nullptr; + } + + for (int i = 0; i < m_regParamLocalMappings->Height(); i++) + { + const RegisterParameterLocalMapping& mapping = m_regParamLocalMappings->BottomRef(i); + if (mapping.LclNum == lclNum) + { + return &mapping; + } + } + + return nullptr; +} + //------------------------------------------------------------------------ // compGetTieringName: get a string describing tiered compilation settings // for this method diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5e05ff24cf1b28..5363beb0ea8e9f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -522,6 +522,7 @@ class LclVarDsc unsigned char lvIsParam : 1; // is this a parameter? unsigned char lvIsRegArg : 1; // is this an argument that was passed by register? + unsigned char lvIsParamRegTarget : 1; // is this the target of a param reg to local mapping? unsigned char lvFramePointerBased : 1; // 0 = off of REG_SPBASE (e.g., ESP), 1 = off of REG_FPBASE (e.g., EBP) unsigned char lvOnFrame : 1; // (part of) the variable lives on the frame @@ -2580,6 +2581,24 @@ struct CloneTryInfo bool ScaleOriginalBlockProfile = false; }; +//------------------------------------------------------------------------ +// RegisterParameterLocalMapping: +// Contains mappings between a parameter register segment and a corresponding +// local. Used by the backend to know which locals are expected to contain +// which register parameters after the prolog. +// +struct RegisterParameterLocalMapping +{ + const ABIPassingSegment* RegisterSegment; + unsigned LclNum; + + RegisterParameterLocalMapping(const ABIPassingSegment* segment, unsigned lclNum) + : RegisterSegment(segment) + , LclNum(lclNum) + { + } +}; + /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -8291,8 +8310,14 @@ class Compiler bool rpMustCreateEBPFrame(INDEBUG(const char** wbReason)); private: - Lowering* m_pLowering; // Lowering; needed to Lower IR that's added or modified after Lowering. - LinearScanInterface* m_pLinearScan; // Linear Scan allocator + Lowering* m_pLowering = nullptr; // Lowering; needed to Lower IR that's added or modified after Lowering. + LinearScanInterface* m_pLinearScan = nullptr; // Linear Scan allocator + +public: + ArrayStack* m_regParamLocalMappings = nullptr; + + const RegisterParameterLocalMapping* FindParameterRegisterLocalMappingByRegister(regNumber reg); + const RegisterParameterLocalMapping* FindParameterRegisterLocalMappingByLocal(unsigned lclNum); /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX @@ -8307,7 +8332,6 @@ class Compiler XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */ -public: // Get handles void eeGetCallInfo(CORINFO_RESOLVED_TOKEN* pResolvedToken, diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index b66da476b05c72..5452fb626cd561 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -1941,7 +1941,7 @@ void Compiler::fgInterBlockLocalVarLiveness() // liveness of such locals will bubble to the top (fgFirstBB) // in fgInterBlockLocalVarLiveness() - if (!varDsc->lvIsParam && VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, varDsc->lvVarIndex) && + if (!varDsc->lvIsParam && !varDsc->lvIsParamRegTarget && VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, varDsc->lvVarIndex) && (info.compInitMem || varTypeIsGC(varDsc->TypeGet())) && !fieldOfDependentlyPromotedStruct) { varDsc->lvMustInit = true; @@ -1962,7 +1962,7 @@ void Compiler::fgInterBlockLocalVarLiveness() if (isFinallyVar) { // Set lvMustInit only if we have a non-arg, GC pointer. - if (!varDsc->lvIsParam && varTypeIsGC(varDsc->TypeGet())) + if (!varDsc->lvIsParam && !varDsc->lvIsParamRegTarget && varTypeIsGC(varDsc->TypeGet())) { varDsc->lvMustInit = true; } diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 63a03c60975853..4575872f111929 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7850,6 +7850,11 @@ PhaseStatus Lowering::DoPhase() LowerBlock(block); } + if (comp->opts.OptimizationEnabled()) + { + MapParameterRegisterLocals(); + } + #ifdef DEBUG JITDUMP("Lower has completed modifying nodes.\n"); if (VERBOSE) @@ -7902,6 +7907,180 @@ PhaseStatus Lowering::DoPhase() return PhaseStatus::MODIFIED_EVERYTHING; } +//------------------------------------------------------------------------ +// Lowering::MapParameterRegisterLocals: +// Create mappings between parameter registers and locals corresponding to +// them. +// +void Lowering::MapParameterRegisterLocals() +{ + comp->m_regParamLocalMappings = new (comp, CMK_ABI) ArrayStack(comp->getAllocator(CMK_ABI)); + + // Create initial mappings for parameters. + for (unsigned lclNum = 0; lclNum < comp->info.compArgsCount; lclNum++) + { + LclVarDsc* lclDsc = comp->lvaGetDesc(lclNum); + const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(lclNum); + + if (abiInfo.HasAnyStackSegment()) + { + continue; + } + + if (lclDsc->lvPromoted) + { + if (comp->lvaGetPromotionType(lclDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT) + { + continue; + } + + for (const ABIPassingSegment& segment : abiInfo.Segments()) + { + unsigned fieldLclNum = comp->lvaGetFieldLocal(lclDsc, segment.Offset); + assert(fieldLclNum != BAD_VAR_NUM); + assert(segment.Size == lclDsc->lvExactSize()); + comp->m_regParamLocalMappings->Emplace(&segment, fieldLclNum); + + LclVarDsc* fieldLclDsc = comp->lvaGetDesc(fieldLclNum); + assert(!fieldLclDsc->lvIsParamRegTarget); + fieldLclDsc->lvIsParamRegTarget = true; + } + } + else + { + if (!abiInfo.HasExactlyOneRegisterSegment()) + { + continue; + } + + const ABIPassingSegment& segment = abiInfo.Segment(0); + if (!lclDsc->lvDoNotEnregister) + { + assert((segment.Size == lclDsc->lvExactSize()) || + ((lclDsc->lvExactSize() < segment.Size) && lclDsc->lvNormalizeOnLoad())); + comp->m_regParamLocalMappings->Emplace(&segment, lclNum); + assert(!lclDsc->lvIsParamRegTarget); + lclDsc->lvIsParamRegTarget = true; + } + } + } + + JitHashTable, bool> storedToLocals(comp->getAllocator(CMK_ABI)); + // Now look for optimization opportunities in the first block: places where + // we read fields out of struct parameters that can be mapped cleanly. This + // is frequently created by physical promotion. + for (GenTree* node : LIR::AsRange(comp->fgFirstBB)) + { + if (!node->OperIs(GT_STORE_LCL_VAR)) + { + continue; + } + + GenTreeLclVarCommon* storeLcl = node->AsLclVarCommon(); + LclVarDsc* destLclDsc = comp->lvaGetDesc(storeLcl); + + storedToLocals.Emplace(storeLcl->GetLclNum(), true); + + if (destLclDsc->lvIsParamRegTarget) + { + continue; + } + + if (destLclDsc->TypeGet() == TYP_STRUCT) + { + continue; + } + + GenTree* data = storeLcl->Data(); + if (!data->OperIs(GT_LCL_FLD)) + { + continue; + } + + GenTreeLclFld* dataFld = data->AsLclFld(); + if (dataFld->GetLclNum() >= comp->info.compArgsCount) + { + continue; + } + + // If the store comes with some form of widening/narrowing then we + // can't remap (it would require creating a new properly sized local). + if (dataFld->TypeGet() != storeLcl->TypeGet()) + { + continue; + } + + if (storedToLocals.Lookup(dataFld->GetLclNum())) + { + // Source is not necessarily the parameter anymore (could be if the + // store happened between the LCL_FLD and STORE_LCL_VAR, but we do + // not handle that here) + continue; + } + + const ABIPassingInformation& dataAbiInfo = comp->lvaGetParameterABIInfo(dataFld->GetLclNum()); + const ABIPassingSegment* regSegment = nullptr; + for (const ABIPassingSegment& segment : dataAbiInfo.Segments()) + { + if (!segment.IsPassedInRegister()) + { + continue; + } + + if ((segment.Offset != dataFld->GetLclOffs()) || (segment.Size != genTypeSize(dataFld))) + { + continue; + } + + bool hasMappingAlready = + (comp->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()) != nullptr) || + (comp->FindParameterRegisterLocalMappingByLocal(storeLcl->GetLclNum()) != nullptr); + + if (hasMappingAlready) + { + continue; + } + + JITDUMP("Store from an unenregisterable parameter induces parameter register remapping. Store:\n"); + DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), node); + + comp->m_regParamLocalMappings->Emplace(&segment, storeLcl->GetLclNum()); + destLclDsc->lvIsParamRegTarget = true; + node->gtBashToNOP(); + dataFld->SetUnusedValue(); + + JITDUMP("New mapping: "); + DBEXEC(VERBOSE, segment.Dump()); + JITDUMP(" -> V%02u\n", storeLcl->GetLclNum()); + + GenTree* regParamValue = comp->gtNewLclvNode(storeLcl->GetLclNum(), dataFld->TypeGet()); + GenTree* storeField = comp->gtNewStoreLclFldNode(dataFld->GetLclNum(), dataFld->TypeGet(), segment.Offset, regParamValue); + + // Store actual parameter local from new reg local + LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeField)); + LowerNode(regParamValue); + LowerNode(storeField); + + JITDUMP("Parameter spill:\n"); + DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), storeField); + } + } + +#ifdef DEBUG + if (comp->verbose) + { + printf("%d parameter register to local mappings\n", comp->m_regParamLocalMappings->Height()); + for (int i = 0; i < comp->m_regParamLocalMappings->Height(); i++) + { + const RegisterParameterLocalMapping& mapping = comp->m_regParamLocalMappings->BottomRef(i); + printf(" "); + mapping.RegisterSegment->Dump(); + printf(" -> V%02u\n", mapping.LclNum); + } + } +#endif +} + #ifdef DEBUG //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 9aee0fd99a1209..659870c844cd73 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -132,6 +132,8 @@ class Lowering final : public Phase static bool CheckBlock(Compiler* compiler, BasicBlock* block); #endif // DEBUG + void MapParameterRegisterLocals(); + void LowerBlock(BasicBlock* block); GenTree* LowerNode(GenTree* node); diff --git a/src/coreclr/jit/lsra.h b/src/coreclr/jit/lsra.h index 9312eb46a52b24..d2b35184b81e57 100644 --- a/src/coreclr/jit/lsra.h +++ b/src/coreclr/jit/lsra.h @@ -1038,20 +1038,6 @@ class LinearScan : public LinearScanInterface Interval* lclVarInterval, LsraLocation currentLoc, GenTree* node, bool isUse, unsigned multiRegIdx); #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE -#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // For AMD64 on SystemV machines. This method - // is called as replacement for raUpdateRegStateForArg - // that is used on Windows. On System V systems a struct can be passed - // partially using registers from the 2 register files. - // - // For LoongArch64's ABI, a struct can be passed - // partially using registers from the 2 register files. - void UpdateRegStateForStructArg(LclVarDsc* argDsc); -#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - - // Update reg state for an incoming register argument - void updateRegStateForArg(LclVarDsc* argDsc); - inline bool isCandidateLocalRef(GenTree* tree) { if (tree->IsLocal()) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 56bba3469eb27b..55b37e71ee95b6 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2029,7 +2029,7 @@ void LinearScan::insertZeroInitRefPositions() while (iter.NextElem(&varIndex)) { LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); - if (!varDsc->lvIsParam && isCandidateVar(varDsc)) + if (!varDsc->lvIsParam && !varDsc->lvIsParamRegTarget && isCandidateVar(varDsc)) { JITDUMP("V%02u was live in to first block:", compiler->lvaTrackedIndexToLclNum(varIndex)); Interval* interval = getIntervalForLocalVar(varIndex); @@ -2094,110 +2094,6 @@ void LinearScan::insertZeroInitRefPositions() } } -#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) -//------------------------------------------------------------------------ -// UpdateRegStateForStructArg: -// Sets the register state for an argument of type STRUCT. -// This is shared between with AMD64's SystemV systems and LoongArch64-ABI. -// -// Arguments: -// argDsc - the LclVarDsc for the argument of interest -// -// Notes: -// See Compiler::raUpdateRegStateForArg(RegState *regState, LclVarDsc *argDsc) in regalloc.cpp -// for how state for argument is updated for unix non-structs and Windows AMD64 structs. -// -void LinearScan::UpdateRegStateForStructArg(LclVarDsc* argDsc) -{ - assert(varTypeIsStruct(argDsc)); - RegState* intRegState = &compiler->codeGen->intRegState; - RegState* floatRegState = &compiler->codeGen->floatRegState; - - if ((argDsc->GetArgReg() != REG_STK) && (argDsc->GetArgReg() != REG_NA)) - { - if ((genRegMask(argDsc->GetArgReg()) & RBM_ALLFLOAT) != RBM_NONE) - { - assert((genRegMask(argDsc->GetArgReg()) & RBM_FLTARG_REGS) != RBM_NONE); - floatRegState->rsCalleeRegArgMaskLiveIn.AddRegNumInMask(argDsc->GetArgReg()); - } - else - { - assert((genRegMask(argDsc->GetArgReg()) & fullIntArgRegMask(compiler->info.compCallConv)) != RBM_NONE); - intRegState->rsCalleeRegArgMaskLiveIn.AddRegNumInMask(argDsc->GetArgReg()); - } - } - - if ((argDsc->GetOtherArgReg() != REG_STK) && (argDsc->GetOtherArgReg() != REG_NA)) - { - if (genRegMask(argDsc->GetOtherArgReg()) & (RBM_ALLFLOAT)) - { - assert(genRegMask(argDsc->GetOtherArgReg()) & (RBM_FLTARG_REGS)); - floatRegState->rsCalleeRegArgMaskLiveIn.AddRegNumInMask(argDsc->GetOtherArgReg()); - } - else - { - assert((genRegMask(argDsc->GetOtherArgReg()) & fullIntArgRegMask(compiler->info.compCallConv)) != RBM_NONE); - intRegState->rsCalleeRegArgMaskLiveIn.AddRegNumInMask(argDsc->GetOtherArgReg()); - } - } -} - -#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - -//------------------------------------------------------------------------ -// updateRegStateForArg: Updates rsCalleeRegArgMaskLiveIn for the appropriate -// regState (either compiler->intRegState or compiler->floatRegState), -// with the lvArgReg on "argDsc" -// -// Arguments: -// argDsc - the argument for which the state is to be updated. -// -// Return Value: None -// -// Assumptions: -// The argument is live on entry to the function -// (or is untracked and therefore assumed live) -// -void LinearScan::updateRegStateForArg(LclVarDsc* argDsc) -{ -#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - // For SystemV-AMD64 and LoongArch64 calls the argDsc - // can have 2 registers (for structs.). Handle them here. - if (varTypeIsStruct(argDsc)) - { - UpdateRegStateForStructArg(argDsc); - } - else -#endif // defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) - { - RegState* intRegState = &compiler->codeGen->intRegState; - RegState* floatRegState = &compiler->codeGen->floatRegState; - bool isFloat = emitter::isFloatReg(argDsc->GetArgReg()); - - if (argDsc->lvIsHfaRegArg()) - { - isFloat = true; - } - - if (isFloat) - { - JITDUMP("Float arg V%02u in reg %s\n", compiler->lvaGetLclNum(argDsc), getRegName(argDsc->GetArgReg())); - compiler->raUpdateRegStateForArg(floatRegState, argDsc); - } - else - { - JITDUMP("Int arg V%02u in reg %s\n", compiler->lvaGetLclNum(argDsc), getRegName(argDsc->GetArgReg())); -#if FEATURE_MULTIREG_ARGS - if (argDsc->GetOtherArgReg() != REG_NA) - { - JITDUMP("(second half) in reg %s\n", getRegName(argDsc->GetOtherArgReg())); - } -#endif // FEATURE_MULTIREG_ARGS - compiler->raUpdateRegStateForArg(intRegState, argDsc); - } - } -} - template void LinearScan::buildIntervals(); template void LinearScan::buildIntervals(); @@ -2279,36 +2175,36 @@ void LinearScan::buildIntervals() regsInUseThisLocation = RBM_NONE; regsInUseNextLocation = RBM_NONE; -#ifdef SWIFT_SUPPORT - if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift) + for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) { - for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); + for (const ABIPassingSegment& seg : abiInfo.Segments()) { - LclVarDsc* argDsc = compiler->lvaGetDesc(lclNum); - - if ((argDsc->lvRefCnt() == 0) && !compiler->opts.compDbgCode) + if (!seg.IsPassedInRegister()) { continue; } - const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); - for (const ABIPassingSegment& seg : abiInfo.Segments()) + const RegisterParameterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(seg.GetRegister()); + + LclVarDsc* argDsc = compiler->lvaGetDesc(mapping != nullptr ? mapping->LclNum : lclNum); + if (!compiler->compJmpOpUsed && (argDsc->lvRefCnt() == 0) && !compiler->opts.compDbgCode) { - if (seg.IsPassedInRegister()) - { - RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? floatRegState : intRegState; - regState->rsCalleeRegArgMaskLiveIn |= seg.GetRegisterMask(); - } + // Not live + continue; } + + RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? floatRegState : intRegState; + regState->rsCalleeRegArgMaskLiveIn |= seg.GetRegisterMask(); } } -#endif for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) { - LclVarDsc* argDsc = compiler->lvaGetDescByTrackedIndex(varIndex); + unsigned lclNum = compiler->lvaTrackedIndexToLclNum(varIndex); + LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum); - if (!argDsc->lvIsParam) + if (!isCandidateVar(lclDsc)) { continue; } @@ -2319,70 +2215,37 @@ void LinearScan::buildIntervals() // Use lvRefCnt instead of checking bbLiveIn because if it's volatile we // won't have done dataflow on it, but it needs to be marked as live-in so // it will get saved in the prolog. - if (!compiler->compJmpOpUsed && argDsc->lvRefCnt() == 0 && !compiler->opts.compDbgCode) + if (!compiler->compJmpOpUsed && (lclDsc->lvRefCnt() == 0) && !compiler->opts.compDbgCode) { continue; } - if (argDsc->lvIsRegArg) - { - updateRegStateForArg(argDsc); - } - - if (isCandidateVar(argDsc)) - { - buildInitialParamDef(argDsc, argDsc->lvIsRegArg ? argDsc->GetArgReg() : REG_NA); - } - else if (argDsc->lvPromoted) + regNumber paramReg = REG_NA; + if (lclDsc->lvIsParamRegTarget) { - for (unsigned fieldVarNum = argDsc->lvFieldLclStart; - fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum) - { - const LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldVarNum); - if (fieldVarDsc->lvLRACandidate) - { - assert(fieldVarDsc->lvTracked); - buildInitialParamDef(fieldVarDsc, REG_NA); - } - } + const RegisterParameterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByLocal(lclNum); + assert(mapping != nullptr); + paramReg = mapping->RegisterSegment->GetRegister(); } - else + else if (lclDsc->lvIsParam) { - // We can overwrite the register (i.e. codegen saves it on entry) - assert(argDsc->lvRefCnt() == 0 || !argDsc->lvIsRegArg || argDsc->lvDoNotEnregister || - !argDsc->lvLRACandidate || (varTypeIsFloating(argDsc->TypeGet()) && compiler->opts.compDbgCode)); - } - } - - // Now set up the reg state for the non-tracked args - // (We do this here because we want to generate the ParamDef RefPositions in tracked - // order, so that loop doesn't hit the non-tracked args) - - for (unsigned argNum = 0; argNum < compiler->info.compArgsCount; argNum++) - { - LclVarDsc* argDsc = compiler->lvaGetDesc(argNum); + // This is a parameter that is a register candidate but not a + // parameter register target. It must be a stack parameter -- + // lowering ensures all potentially enregisterable register + // parameters are marked as lvIsParamRegTarget. +#ifdef DEBUG + unsigned abiLclNum = lclDsc->lvIsStructField ? lclDsc->lvParentLcl : lclNum; + assert(!compiler->lvaGetParameterABIInfo(abiLclNum).HasAnyRegisterSegment()); +#endif - if (argDsc->lvPromoted) - { - for (unsigned fieldVarNum = argDsc->lvFieldLclStart; - fieldVarNum < argDsc->lvFieldLclStart + argDsc->lvFieldCnt; ++fieldVarNum) - { - LclVarDsc* fieldVarDsc = compiler->lvaGetDesc(fieldVarNum); - noway_assert(fieldVarDsc->lvIsParam); - if (!fieldVarDsc->lvTracked && fieldVarDsc->lvIsRegArg) - { - updateRegStateForArg(fieldVarDsc); - } - } + // Fall through with paramReg = REG_NA } else { - noway_assert(argDsc->lvIsParam); - if (!argDsc->lvTracked && argDsc->lvIsRegArg) - { - updateRegStateForArg(argDsc); - } + continue; } + + buildInitialParamDef(lclDsc, paramReg); } // If there is a secret stub param, it is also live in From db15f945c0b006cc55cadfd05330e0c5f8710f50 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 17 Dec 2024 16:06:19 +0100 Subject: [PATCH 02/35] Remove optimization for now --- src/coreclr/jit/lower.cpp | 101 -------------------------------------- 1 file changed, 101 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 4575872f111929..f47d28500cc906 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7965,107 +7965,6 @@ void Lowering::MapParameterRegisterLocals() } } - JitHashTable, bool> storedToLocals(comp->getAllocator(CMK_ABI)); - // Now look for optimization opportunities in the first block: places where - // we read fields out of struct parameters that can be mapped cleanly. This - // is frequently created by physical promotion. - for (GenTree* node : LIR::AsRange(comp->fgFirstBB)) - { - if (!node->OperIs(GT_STORE_LCL_VAR)) - { - continue; - } - - GenTreeLclVarCommon* storeLcl = node->AsLclVarCommon(); - LclVarDsc* destLclDsc = comp->lvaGetDesc(storeLcl); - - storedToLocals.Emplace(storeLcl->GetLclNum(), true); - - if (destLclDsc->lvIsParamRegTarget) - { - continue; - } - - if (destLclDsc->TypeGet() == TYP_STRUCT) - { - continue; - } - - GenTree* data = storeLcl->Data(); - if (!data->OperIs(GT_LCL_FLD)) - { - continue; - } - - GenTreeLclFld* dataFld = data->AsLclFld(); - if (dataFld->GetLclNum() >= comp->info.compArgsCount) - { - continue; - } - - // If the store comes with some form of widening/narrowing then we - // can't remap (it would require creating a new properly sized local). - if (dataFld->TypeGet() != storeLcl->TypeGet()) - { - continue; - } - - if (storedToLocals.Lookup(dataFld->GetLclNum())) - { - // Source is not necessarily the parameter anymore (could be if the - // store happened between the LCL_FLD and STORE_LCL_VAR, but we do - // not handle that here) - continue; - } - - const ABIPassingInformation& dataAbiInfo = comp->lvaGetParameterABIInfo(dataFld->GetLclNum()); - const ABIPassingSegment* regSegment = nullptr; - for (const ABIPassingSegment& segment : dataAbiInfo.Segments()) - { - if (!segment.IsPassedInRegister()) - { - continue; - } - - if ((segment.Offset != dataFld->GetLclOffs()) || (segment.Size != genTypeSize(dataFld))) - { - continue; - } - - bool hasMappingAlready = - (comp->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()) != nullptr) || - (comp->FindParameterRegisterLocalMappingByLocal(storeLcl->GetLclNum()) != nullptr); - - if (hasMappingAlready) - { - continue; - } - - JITDUMP("Store from an unenregisterable parameter induces parameter register remapping. Store:\n"); - DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), node); - - comp->m_regParamLocalMappings->Emplace(&segment, storeLcl->GetLclNum()); - destLclDsc->lvIsParamRegTarget = true; - node->gtBashToNOP(); - dataFld->SetUnusedValue(); - - JITDUMP("New mapping: "); - DBEXEC(VERBOSE, segment.Dump()); - JITDUMP(" -> V%02u\n", storeLcl->GetLclNum()); - - GenTree* regParamValue = comp->gtNewLclvNode(storeLcl->GetLclNum(), dataFld->TypeGet()); - GenTree* storeField = comp->gtNewStoreLclFldNode(dataFld->GetLclNum(), dataFld->TypeGet(), segment.Offset, regParamValue); - - // Store actual parameter local from new reg local - LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeField)); - LowerNode(regParamValue); - LowerNode(storeField); - - JITDUMP("Parameter spill:\n"); - DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), storeField); - } - } - #ifdef DEBUG if (comp->verbose) { From 8e8c913b9e2cd004992d196c9fa99c8d293e303f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Tue, 17 Dec 2024 16:16:59 +0100 Subject: [PATCH 03/35] Add an lvTracked check, remove dead code --- src/coreclr/jit/compiler.h | 2 - src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/regalloc.cpp | 74 ----------------------------------- 3 files changed, 1 insertion(+), 77 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 5363beb0ea8e9f..aefc67d8ddc420 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8277,8 +8277,6 @@ class Compiler */ public: - regNumber raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc); - void raMarkStkVars(); #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 55b37e71ee95b6..3f99bdb2c21aa3 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2188,7 +2188,7 @@ void LinearScan::buildIntervals() const RegisterParameterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(seg.GetRegister()); LclVarDsc* argDsc = compiler->lvaGetDesc(mapping != nullptr ? mapping->LclNum : lclNum); - if (!compiler->compJmpOpUsed && (argDsc->lvRefCnt() == 0) && !compiler->opts.compDbgCode) + if (argDsc->lvTracked && !compiler->compJmpOpUsed && (argDsc->lvRefCnt() == 0) && !compiler->opts.compDbgCode) { // Not live continue; diff --git a/src/coreclr/jit/regalloc.cpp b/src/coreclr/jit/regalloc.cpp index ba07086a63c212..1de4dd8bd66992 100644 --- a/src/coreclr/jit/regalloc.cpp +++ b/src/coreclr/jit/regalloc.cpp @@ -95,80 +95,6 @@ bool Compiler::shouldDoubleAlign( } #endif // DOUBLE_ALIGN -// The code to set the regState for each arg is outlined for shared use -// by linear scan. (It is not shared for System V AMD64 platform.) -regNumber Compiler::raUpdateRegStateForArg(RegState* regState, LclVarDsc* argDsc) -{ - regNumber inArgReg = argDsc->GetArgReg(); - regMaskTP inArgMask = genRegMask(inArgReg); - - if (regState->rsIsFloat) - { - assert((inArgMask & RBM_FLTARG_REGS) != RBM_NONE); - } - else - { - assert((inArgMask & fullIntArgRegMask(info.compCallConv)) != RBM_NONE); - } - - regState->rsCalleeRegArgMaskLiveIn |= inArgMask; - -#ifdef TARGET_ARM - if (argDsc->lvType == TYP_DOUBLE) - { - if (info.compIsVarArgs || opts.compUseSoftFP) - { - assert((inArgReg == REG_R0) || (inArgReg == REG_R2)); - assert(!regState->rsIsFloat); - } - else - { - assert(regState->rsIsFloat); - assert(emitter::isDoubleReg(inArgReg)); - } - regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1)); - } - else if (argDsc->lvType == TYP_LONG) - { - assert((inArgReg == REG_R0) || (inArgReg == REG_R2)); - assert(!regState->rsIsFloat); - regState->rsCalleeRegArgMaskLiveIn |= genRegMask((regNumber)(inArgReg + 1)); - } -#endif // TARGET_ARM - -#if FEATURE_MULTIREG_ARGS - if (varTypeIsStruct(argDsc->lvType)) - { - if (argDsc->lvIsHfaRegArg()) - { - assert(regState->rsIsFloat); - unsigned cSlots = argDsc->lvHfaSlots(); - for (unsigned i = 1; i < cSlots; i++) - { - assert(inArgReg + i <= LAST_FP_ARGREG); - regState->rsCalleeRegArgMaskLiveIn |= genRegMask(static_cast(inArgReg + i)); - } - } - else - { - assert(!regState->rsIsFloat); - unsigned cSlots = argDsc->lvSize() / TARGET_POINTER_SIZE; - for (unsigned i = 1; i < cSlots; i++) - { - regNumber nextArgReg = (regNumber)(inArgReg + i); - if (nextArgReg > REG_ARG_LAST) - { - break; - } - regState->rsCalleeRegArgMaskLiveIn |= genRegMask(nextArgReg); - } - } - } -#endif // FEATURE_MULTIREG_ARGS - - return inArgReg; -} - //------------------------------------------------------------------------ // rpMustCreateEBPFrame: // Returns true when we must create an EBP frame From e504b4bf259554ea4e683a537ec51eda82110d56 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 18 Dec 2024 00:58:02 +0100 Subject: [PATCH 04/35] Support insertions for now --- src/coreclr/jit/compiler.cpp | 8 ++++-- src/coreclr/jit/compiler.h | 14 +++++++-- src/coreclr/jit/lower.cpp | 53 +++++++++++++++-------------------- src/coreclr/jit/lsrabuild.cpp | 33 +++++++++++++++------- 4 files changed, 63 insertions(+), 45 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 42cfc101e934b3..b73cab83bfb1f8 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4253,10 +4253,14 @@ const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMapping // Try to find a mapping that maps a particular local from an incoming // parameter register. // +// Parameters: +// lclNum - The local to find a mapping for +// offset - The offset that the mapping maps to in the local +// // Returns: // The mapping, or nullptr if no mapping was found for this local. // -const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMappingByLocal(unsigned lclNum) +const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMappingByLocal(unsigned lclNum, unsigned offset) { if (m_regParamLocalMappings == nullptr) { @@ -4266,7 +4270,7 @@ const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMapping for (int i = 0; i < m_regParamLocalMappings->Height(); i++) { const RegisterParameterLocalMapping& mapping = m_regParamLocalMappings->BottomRef(i); - if (mapping.LclNum == lclNum) + if ((mapping.LclNum == lclNum) && (mapping.Offset == offset)) { return &mapping; } diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index aefc67d8ddc420..67067098307d94 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2591,10 +2591,18 @@ struct RegisterParameterLocalMapping { const ABIPassingSegment* RegisterSegment; unsigned LclNum; - - RegisterParameterLocalMapping(const ABIPassingSegment* segment, unsigned lclNum) + // Offset at which the register is inserted into the local. Used e.g. for + // HFAs on arm64 that might have been promoted as a single local (e.g. + // System.Numerics.Plane is passed in 3 float regs but enregistered as + // TYP_SIMD12). + // SysV 64 also see similar situations e.g. Vector3 being passed in + // xmm0[0..8), xmm1[8..12), but enregistered as one register. + unsigned Offset; + + RegisterParameterLocalMapping(const ABIPassingSegment* segment, unsigned lclNum, unsigned offset) : RegisterSegment(segment) , LclNum(lclNum) + , Offset(offset) { } }; @@ -8315,7 +8323,7 @@ class Compiler ArrayStack* m_regParamLocalMappings = nullptr; const RegisterParameterLocalMapping* FindParameterRegisterLocalMappingByRegister(regNumber reg); - const RegisterParameterLocalMapping* FindParameterRegisterLocalMappingByLocal(unsigned lclNum); + const RegisterParameterLocalMapping* FindParameterRegisterLocalMappingByLocal(unsigned lclNum, unsigned offset); /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index f47d28500cc906..a7ce8170923682 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7916,7 +7916,7 @@ void Lowering::MapParameterRegisterLocals() { comp->m_regParamLocalMappings = new (comp, CMK_ABI) ArrayStack(comp->getAllocator(CMK_ABI)); - // Create initial mappings for parameters. + // Create initial mappings for promotions. for (unsigned lclNum = 0; lclNum < comp->info.compArgsCount; lclNum++) { LclVarDsc* lclDsc = comp->lvaGetDesc(lclNum); @@ -7927,41 +7927,34 @@ void Lowering::MapParameterRegisterLocals() continue; } - if (lclDsc->lvPromoted) + if (comp->lvaGetPromotionType(lclDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT) { - if (comp->lvaGetPromotionType(lclDsc) != Compiler::PROMOTION_TYPE_INDEPENDENT) - { - continue; - } - - for (const ABIPassingSegment& segment : abiInfo.Segments()) - { - unsigned fieldLclNum = comp->lvaGetFieldLocal(lclDsc, segment.Offset); - assert(fieldLclNum != BAD_VAR_NUM); - assert(segment.Size == lclDsc->lvExactSize()); - comp->m_regParamLocalMappings->Emplace(&segment, fieldLclNum); - - LclVarDsc* fieldLclDsc = comp->lvaGetDesc(fieldLclNum); - assert(!fieldLclDsc->lvIsParamRegTarget); - fieldLclDsc->lvIsParamRegTarget = true; - } + continue; } - else + + for (int i = 0; i < lclDsc->lvFieldCnt; i++) { - if (!abiInfo.HasExactlyOneRegisterSegment()) - { - continue; - } + unsigned fieldLclNum = lclDsc->lvFieldLclStart + i; + LclVarDsc* fieldDsc = comp->lvaGetDesc(fieldLclNum); - const ABIPassingSegment& segment = abiInfo.Segment(0); - if (!lclDsc->lvDoNotEnregister) + for (const ABIPassingSegment& segment : abiInfo.Segments()) { - assert((segment.Size == lclDsc->lvExactSize()) || - ((lclDsc->lvExactSize() < segment.Size) && lclDsc->lvNormalizeOnLoad())); - comp->m_regParamLocalMappings->Emplace(&segment, lclNum); - assert(!lclDsc->lvIsParamRegTarget); - lclDsc->lvIsParamRegTarget = true; + if (segment.Offset + segment.Size <= fieldDsc->lvFldOffset) + { + continue; + } + + if (fieldDsc->lvFldOffset + fieldDsc->lvExactSize() <= segment.Offset) + { + continue; + } + + comp->m_regParamLocalMappings->Emplace(&segment, fieldLclNum, segment.Offset - fieldDsc->lvFldOffset); } + + LclVarDsc* fieldLclDsc = comp->lvaGetDesc(fieldLclNum); + assert(!fieldLclDsc->lvIsParamRegTarget); + fieldLclDsc->lvIsParamRegTarget = true; } } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 3f99bdb2c21aa3..f542f245f4b3b1 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2223,22 +2223,35 @@ void LinearScan::buildIntervals() regNumber paramReg = REG_NA; if (lclDsc->lvIsParamRegTarget) { - const RegisterParameterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByLocal(lclNum); + const RegisterParameterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByLocal(lclNum, 0); assert(mapping != nullptr); paramReg = mapping->RegisterSegment->GetRegister(); } else if (lclDsc->lvIsParam) { - // This is a parameter that is a register candidate but not a - // parameter register target. It must be a stack parameter -- - // lowering ensures all potentially enregisterable register - // parameters are marked as lvIsParamRegTarget. -#ifdef DEBUG - unsigned abiLclNum = lclDsc->lvIsStructField ? lclDsc->lvParentLcl : lclNum; - assert(!compiler->lvaGetParameterABIInfo(abiLclNum).HasAnyRegisterSegment()); -#endif + if (lclDsc->lvIsStructField) + { + // All promoted fields should be assigned via the + // lvIsParamRegTarget mechanism, so this must be a stack + // argument. + assert(!compiler->lvaGetParameterABIInfo(lclDsc->lvParentLcl).HasAnyRegisterSegment()); - // Fall through with paramReg = REG_NA + // Fall through with paramReg == REG_NA + } + else + { + // Enregisterable parameter, may or may not be a stack arg. + // Prefer the first register if there is one. + const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); + for (const ABIPassingSegment& seg : abiInfo.Segments()) + { + if (seg.IsPassedInRegister()) + { + paramReg = seg.GetRegister(); + break; + } + } + } } else { From bf196bd0ca57d1ec65c5557b51eeec8169681d36 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 18 Dec 2024 00:58:33 +0100 Subject: [PATCH 05/35] Run jit-format --- src/coreclr/jit/codegencommon.cpp | 9 +++++---- src/coreclr/jit/compiler.cpp | 3 ++- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/liveness.cpp | 3 ++- src/coreclr/jit/lower.cpp | 9 +++++---- src/coreclr/jit/lsrabuild.cpp | 11 +++++++---- 6 files changed, 22 insertions(+), 15 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index eb9a40aa095094..fa61ae5087e312 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3258,8 +3258,8 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, const ABIPassingSegmen if (varDsc->lvOnFrame && (!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr)) { - unsigned paramLclNum = varDsc->lvIsStructField ? varDsc->lvParentLcl : lclNum; - LclVarDsc* paramVarDsc = compiler->lvaGetDesc(paramLclNum); + unsigned paramLclNum = varDsc->lvIsStructField ? varDsc->lvParentLcl : lclNum; + LclVarDsc* paramVarDsc = compiler->lvaGetDesc(paramLclNum); var_types storeType = genParamStackType(paramVarDsc, segment); if ((varDsc->TypeGet() != TYP_STRUCT) && (genTypeSize(genActualType(varDsc)) < genTypeSize(storeType))) @@ -3396,7 +3396,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) { - LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum); + LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum); const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); for (const ABIPassingSegment& segment : abiInfo.Segments()) @@ -3406,7 +3406,8 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) continue; } - const RegisterParameterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()); + const RegisterParameterLocalMapping* mapping = + compiler->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()); unsigned fieldLclNum = mapping != nullptr ? mapping->LclNum : lclNum; genSpillOrAddRegisterParam(fieldLclNum, segment, &graph); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index b73cab83bfb1f8..e1aba64032ceca 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4260,7 +4260,8 @@ const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMapping // Returns: // The mapping, or nullptr if no mapping was found for this local. // -const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMappingByLocal(unsigned lclNum, unsigned offset) +const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMappingByLocal(unsigned lclNum, + unsigned offset) { if (m_regParamLocalMappings == nullptr) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 67067098307d94..84cc274c15b6e8 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8316,7 +8316,7 @@ class Compiler bool rpMustCreateEBPFrame(INDEBUG(const char** wbReason)); private: - Lowering* m_pLowering = nullptr; // Lowering; needed to Lower IR that's added or modified after Lowering. + Lowering* m_pLowering = nullptr; // Lowering; needed to Lower IR that's added or modified after Lowering. LinearScanInterface* m_pLinearScan = nullptr; // Linear Scan allocator public: diff --git a/src/coreclr/jit/liveness.cpp b/src/coreclr/jit/liveness.cpp index 5452fb626cd561..726b4a5e3b2763 100644 --- a/src/coreclr/jit/liveness.cpp +++ b/src/coreclr/jit/liveness.cpp @@ -1941,7 +1941,8 @@ void Compiler::fgInterBlockLocalVarLiveness() // liveness of such locals will bubble to the top (fgFirstBB) // in fgInterBlockLocalVarLiveness() - if (!varDsc->lvIsParam && !varDsc->lvIsParamRegTarget && VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, varDsc->lvVarIndex) && + if (!varDsc->lvIsParam && !varDsc->lvIsParamRegTarget && + VarSetOps::IsMember(this, fgFirstBB->bbLiveIn, varDsc->lvVarIndex) && (info.compInitMem || varTypeIsGC(varDsc->TypeGet())) && !fieldOfDependentlyPromotedStruct) { varDsc->lvMustInit = true; diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index a7ce8170923682..7ad716fe79c5a1 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7914,12 +7914,13 @@ PhaseStatus Lowering::DoPhase() // void Lowering::MapParameterRegisterLocals() { - comp->m_regParamLocalMappings = new (comp, CMK_ABI) ArrayStack(comp->getAllocator(CMK_ABI)); + comp->m_regParamLocalMappings = + new (comp, CMK_ABI) ArrayStack(comp->getAllocator(CMK_ABI)); // Create initial mappings for promotions. for (unsigned lclNum = 0; lclNum < comp->info.compArgsCount; lclNum++) { - LclVarDsc* lclDsc = comp->lvaGetDesc(lclNum); + LclVarDsc* lclDsc = comp->lvaGetDesc(lclNum); const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(lclNum); if (abiInfo.HasAnyStackSegment()) @@ -7934,8 +7935,8 @@ void Lowering::MapParameterRegisterLocals() for (int i = 0; i < lclDsc->lvFieldCnt; i++) { - unsigned fieldLclNum = lclDsc->lvFieldLclStart + i; - LclVarDsc* fieldDsc = comp->lvaGetDesc(fieldLclNum); + unsigned fieldLclNum = lclDsc->lvFieldLclStart + i; + LclVarDsc* fieldDsc = comp->lvaGetDesc(fieldLclNum); for (const ABIPassingSegment& segment : abiInfo.Segments()) { diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index f542f245f4b3b1..b74dbd824e7489 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2185,10 +2185,12 @@ void LinearScan::buildIntervals() continue; } - const RegisterParameterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(seg.GetRegister()); + const RegisterParameterLocalMapping* mapping = + compiler->FindParameterRegisterLocalMappingByRegister(seg.GetRegister()); LclVarDsc* argDsc = compiler->lvaGetDesc(mapping != nullptr ? mapping->LclNum : lclNum); - if (argDsc->lvTracked && !compiler->compJmpOpUsed && (argDsc->lvRefCnt() == 0) && !compiler->opts.compDbgCode) + if (argDsc->lvTracked && !compiler->compJmpOpUsed && (argDsc->lvRefCnt() == 0) && + !compiler->opts.compDbgCode) { // Not live continue; @@ -2201,7 +2203,7 @@ void LinearScan::buildIntervals() for (unsigned int varIndex = 0; varIndex < compiler->lvaTrackedCount; varIndex++) { - unsigned lclNum = compiler->lvaTrackedIndexToLclNum(varIndex); + unsigned lclNum = compiler->lvaTrackedIndexToLclNum(varIndex); LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum); if (!isCandidateVar(lclDsc)) @@ -2223,7 +2225,8 @@ void LinearScan::buildIntervals() regNumber paramReg = REG_NA; if (lclDsc->lvIsParamRegTarget) { - const RegisterParameterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByLocal(lclNum, 0); + const RegisterParameterLocalMapping* mapping = + compiler->FindParameterRegisterLocalMappingByLocal(lclNum, 0); assert(mapping != nullptr); paramReg = mapping->RegisterSegment->GetRegister(); } From 559baf06bcfa1010fd6c0adca00c6855016f58f0 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 18 Dec 2024 12:16:42 +0100 Subject: [PATCH 06/35] Avoid homing Swift parameters if they are !lvOnFrame --- src/coreclr/jit/codegencommon.cpp | 9 ++++++--- src/coreclr/jit/lsrabuild.cpp | 4 +++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index fa61ae5087e312..e941bdd8859121 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3374,7 +3374,8 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) } } - if (compiler->info.compPublishStubParam && ((paramRegs & RBM_SECRET_STUB_PARAM) != RBM_NONE)) + if (compiler->info.compPublishStubParam && ((paramRegs & RBM_SECRET_STUB_PARAM) != RBM_NONE) && + compiler->lvaGetDesc(compiler->lvaStubArgumentVar)->lvOnFrame) { GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SECRET_STUB_PARAM, compiler->lvaStubArgumentVar, 0); @@ -5701,14 +5702,16 @@ void CodeGen::genFnProlog() if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift) { if ((compiler->lvaSwiftSelfArg != BAD_VAR_NUM) && - ((intRegState.rsCalleeRegArgMaskLiveIn & RBM_SWIFT_SELF) != 0)) + ((intRegState.rsCalleeRegArgMaskLiveIn & RBM_SWIFT_SELF) != 0) && + compiler->lvaGetDesc(compiler->lvaSwiftSelfArg)->lvOnFrame) { GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SWIFT_SELF, compiler->lvaSwiftSelfArg, 0); intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SWIFT_SELF; } if ((compiler->lvaSwiftIndirectResultArg != BAD_VAR_NUM) && - ((intRegState.rsCalleeRegArgMaskLiveIn & theFixedRetBuffMask(CorInfoCallConvExtension::Swift)) != 0)) + ((intRegState.rsCalleeRegArgMaskLiveIn & theFixedRetBuffMask(CorInfoCallConvExtension::Swift)) != 0) && + compiler->lvaGetDesc(compiler->lvaSwiftIndirectResultArg)->lvOnFrame) { GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, theFixedRetBuffReg(CorInfoCallConvExtension::Swift), diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index b74dbd824e7489..0b5691f72f30c0 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2188,7 +2188,9 @@ void LinearScan::buildIntervals() const RegisterParameterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(seg.GetRegister()); - LclVarDsc* argDsc = compiler->lvaGetDesc(mapping != nullptr ? mapping->LclNum : lclNum); + unsigned mappedLclNum = mapping != nullptr ? mapping->LclNum : lclNum; + JITDUMP("Arg V%02u in reg %s\n", mappedLclNum, getRegName(seg.GetRegister())); + LclVarDsc* argDsc = compiler->lvaGetDesc(mappedLclNum); if (argDsc->lvTracked && !compiler->compJmpOpUsed && (argDsc->lvRefCnt() == 0) && !compiler->opts.compDbgCode) { From 6d1b57ebc126310b2a958d4cfa0d2b680f17d0e9 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 18 Dec 2024 13:11:58 +0100 Subject: [PATCH 07/35] Rename --- src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/compiler.cpp | 16 ++++++++-------- src/coreclr/jit/compiler.h | 12 ++++++------ src/coreclr/jit/lower.cpp | 12 ++++++------ src/coreclr/jit/lsrabuild.cpp | 4 ++-- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index e941bdd8859121..b6555f4bb11861 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3407,7 +3407,7 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) continue; } - const RegisterParameterLocalMapping* mapping = + const ParameterRegisterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()); unsigned fieldLclNum = mapping != nullptr ? mapping->LclNum : lclNum; diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index e1aba64032ceca..9ec9c84c785bd9 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4229,16 +4229,16 @@ bool Compiler::compRsvdRegCheck(FrameLayoutState curState) // Returns: // The mapping, or nullptr if no mapping was found for this register. // -const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMappingByRegister(regNumber reg) +const ParameterRegisterLocalMapping* Compiler::FindParameterRegisterLocalMappingByRegister(regNumber reg) { - if (m_regParamLocalMappings == nullptr) + if (m_paramRegLocalMappings == nullptr) { return nullptr; } - for (int i = 0; i < m_regParamLocalMappings->Height(); i++) + for (int i = 0; i < m_paramRegLocalMappings->Height(); i++) { - const RegisterParameterLocalMapping& mapping = m_regParamLocalMappings->BottomRef(i); + const ParameterRegisterLocalMapping& mapping = m_paramRegLocalMappings->BottomRef(i); if (mapping.RegisterSegment->GetRegister() == reg) { return &mapping; @@ -4260,17 +4260,17 @@ const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMapping // Returns: // The mapping, or nullptr if no mapping was found for this local. // -const RegisterParameterLocalMapping* Compiler::FindParameterRegisterLocalMappingByLocal(unsigned lclNum, +const ParameterRegisterLocalMapping* Compiler::FindParameterRegisterLocalMappingByLocal(unsigned lclNum, unsigned offset) { - if (m_regParamLocalMappings == nullptr) + if (m_paramRegLocalMappings == nullptr) { return nullptr; } - for (int i = 0; i < m_regParamLocalMappings->Height(); i++) + for (int i = 0; i < m_paramRegLocalMappings->Height(); i++) { - const RegisterParameterLocalMapping& mapping = m_regParamLocalMappings->BottomRef(i); + const ParameterRegisterLocalMapping& mapping = m_paramRegLocalMappings->BottomRef(i); if ((mapping.LclNum == lclNum) && (mapping.Offset == offset)) { return &mapping; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 84cc274c15b6e8..2622a2c07a5e6f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2582,12 +2582,12 @@ struct CloneTryInfo }; //------------------------------------------------------------------------ -// RegisterParameterLocalMapping: +// ParameterRegisterLocalMapping: // Contains mappings between a parameter register segment and a corresponding // local. Used by the backend to know which locals are expected to contain // which register parameters after the prolog. // -struct RegisterParameterLocalMapping +struct ParameterRegisterLocalMapping { const ABIPassingSegment* RegisterSegment; unsigned LclNum; @@ -2599,7 +2599,7 @@ struct RegisterParameterLocalMapping // xmm0[0..8), xmm1[8..12), but enregistered as one register. unsigned Offset; - RegisterParameterLocalMapping(const ABIPassingSegment* segment, unsigned lclNum, unsigned offset) + ParameterRegisterLocalMapping(const ABIPassingSegment* segment, unsigned lclNum, unsigned offset) : RegisterSegment(segment) , LclNum(lclNum) , Offset(offset) @@ -8320,10 +8320,10 @@ class Compiler LinearScanInterface* m_pLinearScan = nullptr; // Linear Scan allocator public: - ArrayStack* m_regParamLocalMappings = nullptr; + ArrayStack* m_paramRegLocalMappings = nullptr; - const RegisterParameterLocalMapping* FindParameterRegisterLocalMappingByRegister(regNumber reg); - const RegisterParameterLocalMapping* FindParameterRegisterLocalMappingByLocal(unsigned lclNum, unsigned offset); + const ParameterRegisterLocalMapping* FindParameterRegisterLocalMappingByRegister(regNumber reg); + const ParameterRegisterLocalMapping* FindParameterRegisterLocalMappingByLocal(unsigned lclNum, unsigned offset); /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 7ad716fe79c5a1..8b7b3dd890cd88 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7914,8 +7914,8 @@ PhaseStatus Lowering::DoPhase() // void Lowering::MapParameterRegisterLocals() { - comp->m_regParamLocalMappings = - new (comp, CMK_ABI) ArrayStack(comp->getAllocator(CMK_ABI)); + comp->m_paramRegLocalMappings = + new (comp, CMK_ABI) ArrayStack(comp->getAllocator(CMK_ABI)); // Create initial mappings for promotions. for (unsigned lclNum = 0; lclNum < comp->info.compArgsCount; lclNum++) @@ -7950,7 +7950,7 @@ void Lowering::MapParameterRegisterLocals() continue; } - comp->m_regParamLocalMappings->Emplace(&segment, fieldLclNum, segment.Offset - fieldDsc->lvFldOffset); + comp->m_paramRegLocalMappings->Emplace(&segment, fieldLclNum, segment.Offset - fieldDsc->lvFldOffset); } LclVarDsc* fieldLclDsc = comp->lvaGetDesc(fieldLclNum); @@ -7962,10 +7962,10 @@ void Lowering::MapParameterRegisterLocals() #ifdef DEBUG if (comp->verbose) { - printf("%d parameter register to local mappings\n", comp->m_regParamLocalMappings->Height()); - for (int i = 0; i < comp->m_regParamLocalMappings->Height(); i++) + printf("%d parameter register to local mappings\n", comp->m_paramRegLocalMappings->Height()); + for (int i = 0; i < comp->m_paramRegLocalMappings->Height(); i++) { - const RegisterParameterLocalMapping& mapping = comp->m_regParamLocalMappings->BottomRef(i); + const ParameterRegisterLocalMapping& mapping = comp->m_paramRegLocalMappings->BottomRef(i); printf(" "); mapping.RegisterSegment->Dump(); printf(" -> V%02u\n", mapping.LclNum); diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 0b5691f72f30c0..4cf182f374ebbd 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2185,7 +2185,7 @@ void LinearScan::buildIntervals() continue; } - const RegisterParameterLocalMapping* mapping = + const ParameterRegisterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(seg.GetRegister()); unsigned mappedLclNum = mapping != nullptr ? mapping->LclNum : lclNum; @@ -2227,7 +2227,7 @@ void LinearScan::buildIntervals() regNumber paramReg = REG_NA; if (lclDsc->lvIsParamRegTarget) { - const RegisterParameterLocalMapping* mapping = + const ParameterRegisterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByLocal(lclNum, 0); assert(mapping != nullptr); paramReg = mapping->RegisterSegment->GetRegister(); From 058e8dcd86bd487ef722590ff649dba708935164 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 18 Dec 2024 19:38:39 +0100 Subject: [PATCH 08/35] JIT: Optimize struct parameter register accesses in the backend This PR adds an optimization in lowering to utilize the new parameter register to local mappings added in #110795. The optimization detects IR that is going to result in stack spills/loads and instead replaces them with scalar locals that will be able to stay in registers. Physical promotion benefits especially from this as it creates the kind of IR that the optimization ends up kicking in for. The heuristics of physical promotion are updated to account for the fact that the backend is now able to do this optimization, making physical promotion more likely to promote struct parameters. --- src/coreclr/jit/codegencommon.cpp | 4 + src/coreclr/jit/compiler.hpp | 2 +- src/coreclr/jit/lclvars.cpp | 10 +- src/coreclr/jit/lower.cpp | 190 ++++++++++++++++++++++++++++++ src/coreclr/jit/lower.h | 4 + src/coreclr/jit/lsra.cpp | 2 +- src/coreclr/jit/lsrabuild.cpp | 2 +- src/coreclr/jit/promotion.cpp | 67 ++++++++++- 8 files changed, 274 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index b6555f4bb11861..07ca415564adca 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -5745,6 +5745,10 @@ void CodeGen::genFnProlog() #else genEnregisterOSRArgsAndLocals(); #endif + // OSR functions take no parameters in registers. Ensure no mappings + // are present. + //assert((compiler->m_paramRegLocalMappings == nullptr) || compiler->m_paramRegLocalMappings->Empty()); + compiler->lvaUpdateArgsWithInitialReg(); } else diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 38fa10167c48a9..243147274cef44 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4246,7 +4246,7 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename) bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum) { LclVarDsc* varDsc = lvaGetDesc(varNum); - bool result = varDsc->lvIsParam || lvaIsOSRLocal(varNum) || (varNum == lvaGSSecurityCookie) || + bool result = varDsc->lvIsParam || varDsc->lvIsParamRegTarget || lvaIsOSRLocal(varNum) || (varNum == lvaGSSecurityCookie) || (varNum == lvaInlinedPInvokeFrameVar) || (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar); #ifdef TARGET_ARM64 diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 2e212085da8a92..d88ce324493bd4 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -4938,8 +4938,8 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) // that was set by past phases. if (!isRecompute) { - varDsc->lvSingleDef = varDsc->lvIsParam; - varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam; + varDsc->lvSingleDef = varDsc->lvIsParam || varDsc->lvIsParamRegTarget; + varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam || varDsc->lvIsParamRegTarget; varDsc->lvAllDefsAreNoGc = (varDsc->lvImplicitlyReferenced == false); } @@ -5033,6 +5033,12 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) } } + if (varDsc->lvIsParamRegTarget && (varDsc->lvRefCnt() > 0)) + { + varDsc->incRefCnts(BB_UNITY_WEIGHT, this); + varDsc->incRefCnts(BB_UNITY_WEIGHT, this); + } + // If we have JMP, all arguments must have a location // even if we don't use them inside the method if (compJmpOpUsed && varDsc->lvIsParam && (varDsc->lvRefCnt() == 0)) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 8b7b3dd890cd88..98b65f49931f38 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7959,6 +7959,11 @@ void Lowering::MapParameterRegisterLocals() } } + if (!comp->opts.IsOSR()) + { + FindInducedParameterRegisterLocals(); + } + #ifdef DEBUG if (comp->verbose) { @@ -7974,6 +7979,191 @@ void Lowering::MapParameterRegisterLocals() #endif } +//------------------------------------------------------------------------ +// Lowering::FindInducedParameterRegisterLocals: +// Find locals that would be profitable to map from parameter registers, +// based on IR in the initialization block. +// +void Lowering::FindInducedParameterRegisterLocals() +{ + LocalSet storedToLocals(comp->getAllocator(CMK_ABI)); + // Now look for optimization opportunities in the first block: places where + // we read fields out of struct parameters that can be mapped cleanly. This + // is frequently created by physical promotion. + for (GenTree* node : LIR::AsRange(comp->fgFirstBB)) + { + GenTreeLclVarCommon* storeLcl; + if (node->DefinesLocal(comp, &storeLcl)) + { + storedToLocals.Emplace(storeLcl->GetLclNum(), true); + continue; + } + + if (node->OperIs(GT_LCL_ADDR)) + { + storedToLocals.Emplace(node->AsLclVarCommon()->GetLclNum(), true); + continue; + } + + if (!node->OperIs(GT_LCL_FLD)) + { + continue; + } + + GenTreeLclFld* fld = node->AsLclFld(); + if (fld->GetLclNum() >= comp->info.compArgsCount) + { + continue; + } + + if (storedToLocals.Lookup(fld->GetLclNum())) + { + // LCL_FLD does not necessarily take the value of the parameter + // anymore. + continue; + } + + const ABIPassingInformation& dataAbiInfo = comp->lvaGetParameterABIInfo(fld->GetLclNum()); + const ABIPassingSegment* regSegment = nullptr; + for (const ABIPassingSegment& segment : dataAbiInfo.Segments()) + { + if (!segment.IsPassedInRegister()) + { + continue; + } + + if ((segment.Offset != fld->GetLclOffs()) || (segment.Size != genTypeSize(fld)) || (varTypeUsesIntReg(fld) != genIsValidIntReg(segment.GetRegister()))) + { + continue; + } + + // This is a match, but check if it is already remapped. + // TODO-CQ: If it is already remapped, we can reuse the value from + // the remapping. + if (comp->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()) == nullptr) + { + regSegment = &segment; + } + + break; + } + + if (regSegment == nullptr) + { + continue; + } + + JITDUMP("LCL_FLD use [%06u] of unenregisterable parameter corresponds to ", Compiler::dspTreeID(fld)); + DBEXEC(VERBOSE, regSegment->Dump()); + JITDUMP("\n"); + + // Now see if we want to introduce a new local for this value, or if we + // can reuse one because this is the source of a store (frequently + // created by physical promotion). + LIR::Use use; + if (!LIR::AsRange(comp->fgFirstBB).TryGetUse(fld, &use)) + { + JITDUMP(" ..but no use was found\n"); + continue; + } + + unsigned remappedLclNum = TryReuseLocalForParameterAccess(use, storedToLocals); + + if (remappedLclNum == BAD_VAR_NUM) + { + remappedLclNum = comp->lvaGrabTemp(false DEBUGARG(comp->printfAlloc("struct parameter register %s", getRegName(regSegment->GetRegister())))); + comp->lvaGetDesc(remappedLclNum)->lvType = fld->TypeGet(); + JITDUMP("Created new local V%02u for the mapping\n", remappedLclNum); + } + else + { + JITDUMP("Reusing local V%02u for store from struct parameter register %s. Store:\n", remappedLclNum, getRegName(regSegment->GetRegister())); + DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), use.User()); + + // The store will be a no-op, so get rid of it + LIR::AsRange(comp->fgFirstBB).Remove(use.User(), true); + use = LIR::Use(); + } + + comp->m_paramRegLocalMappings->Emplace(regSegment, remappedLclNum, 0); + comp->lvaGetDesc(remappedLclNum)->lvIsParamRegTarget = true; + + JITDUMP("New mapping: "); + DBEXEC(VERBOSE, regSegment->Dump()); + JITDUMP(" -> V%02u\n", remappedLclNum); + + GenTree* paramRegValue = comp->gtNewLclvNode(remappedLclNum, genActualType(fld)); + GenTree* storeField = comp->gtNewStoreLclFldNode(fld->GetLclNum(), fld->TypeGet(), regSegment->Offset, paramRegValue); + + // Store actual parameter local from new reg local + LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeField)); + LowerNode(paramRegValue); + LowerNode(storeField); + + JITDUMP("Parameter spill:\n"); + DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), storeField); + + // Insert explicit normalization for small types (the LCL_FLD we + // are replacing comes with this normalization). + if (varTypeIsSmall(fld)) + { + GenTree* lcl = comp->gtNewLclvNode(remappedLclNum, genActualType(fld)); + GenTree* normalizeLcl = comp->gtNewCastNode(TYP_INT, lcl, false, fld->TypeGet()); + GenTree* storeNormalizedLcl = comp->gtNewStoreLclVarNode(remappedLclNum, normalizeLcl); + LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeNormalizedLcl)); + LowerNode(lcl); + LowerNode(normalizeLcl); + LowerNode(storeNormalizedLcl); + + JITDUMP("Parameter normalization:\n"); + DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), storeNormalizedLcl); + } + + // If we still have a valid use, then replace the LCL_FLD with a + // LCL_VAR of the remapped parameter register local. + if (use.IsInitialized()) + { + GenTree* lcl = comp->gtNewLclvNode(remappedLclNum, genActualType(fld)); + LIR::AsRange(comp->fgFirstBB).InsertAfter(fld, lcl); + use.ReplaceWith(lcl); + LowerNode(lcl); + JITDUMP("New user tree range:\n"); + DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), use.User()); + fld->gtBashToNOP(); + } + } +} + +unsigned Lowering::TryReuseLocalForParameterAccess(const LIR::Use& use, const LocalSet& storedToLocals) +{ + GenTree* useNode = use.User(); + + if (!useNode->OperIs(GT_STORE_LCL_VAR)) + { + return BAD_VAR_NUM; + } + + LclVarDsc* destLclDsc = comp->lvaGetDesc(useNode->AsLclVarCommon()); + + if (destLclDsc->lvIsParamRegTarget) + { + return BAD_VAR_NUM; + } + + if (destLclDsc->TypeGet() == TYP_STRUCT) + { + return BAD_VAR_NUM; + } + + if (storedToLocals.Lookup(useNode->AsLclVarCommon()->GetLclNum())) + { + // Destination may change value before this access + return BAD_VAR_NUM; + } + + return useNode->AsLclVarCommon()->GetLclNum(); +} + #ifdef DEBUG //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 659870c844cd73..8ab242e9cd9eea 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -132,7 +132,11 @@ class Lowering final : public Phase static bool CheckBlock(Compiler* compiler, BasicBlock* block); #endif // DEBUG + typedef JitHashTable, bool> LocalSet; + void MapParameterRegisterLocals(); + void FindInducedParameterRegisterLocals(); + unsigned TryReuseLocalForParameterAccess(const LIR::Use& use, const LocalSet& storedToLocals); void LowerBlock(BasicBlock* block); GenTree* LowerNode(GenTree* node); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index 4d4460b9dea947..bd75af2ce97204 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1453,7 +1453,7 @@ void LinearScan::identifyCandidatesExceptionDataflow() assert(varDsc->lvLiveInOutOfHndlr); - if (varTypeIsGC(varDsc) && VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam) + if (varTypeIsGC(varDsc) && VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam && !varDsc->lvIsParamRegTarget) { assert(varDsc->lvMustInit); } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 4cf182f374ebbd..f52eec0d90b166 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2066,7 +2066,7 @@ void LinearScan::insertZeroInitRefPositions() while (iter.NextElem(&varIndex)) { LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); - if (!varDsc->lvIsParam && isCandidateVar(varDsc)) + if (!varDsc->lvIsParam && !varDsc->lvIsParamRegTarget && isCandidateVar(varDsc)) { JITDUMP("V%02u is a finally var:", compiler->lvaTrackedIndexToLclNum(varIndex)); Interval* interval = getIntervalForLocalVar(varIndex); diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index a1baf4cc3768ec..2fa6f1664dcff4 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -767,13 +767,36 @@ class LocalUses unsigned countReadBacks = 0; weight_t countReadBacksWtd = 0; - // For parameters or OSR locals we always need one read back. - if (lcl->lvIsParam || lcl->lvIsOSRLocal) + + // For OSR locals we always need one read back. + if (lcl->lvIsOSRLocal) { countReadBacks++; countReadBacksWtd += comp->fgFirstBB->getBBWeight(comp); } + // For parameters, the backend may be able to map it directly from a register. + if (lcl->lvIsParam) + { + if (MapsToRegister(comp, access, lclNum)) + { + // No promotion will result in a store to stack in the prolog. + costWithout += COST_STRUCT_ACCESS_CYCLES * comp->fgFirstBB->getBBWeight(comp); + sizeWithout += COST_STRUCT_ACCESS_SIZE; + + // Promotion we cost like the normal reg accesses above + costWith += COST_REG_ACCESS_CYCLES * comp->fgFirstBB->getBBWeight(comp); + sizeWith += COST_REG_ACCESS_SIZE; + } + else + { + // Otherwise we expect no prolog work to be required if we + // don't promote, and we need a read back from the stack. + countReadBacks++; + countReadBacksWtd += comp->fgFirstBB->getBBWeight(comp); + } + } + // If the struct is stored from a call (either due to a multireg // return or by being passed as the retbuffer) then we need a readback // after. @@ -1000,6 +1023,46 @@ class LocalUses return nullptr; } + + //------------------------------------------------------------------------ + // MapsToRegister: + // Check if a specific access in the specified parameter local is + // expected to map to a register. + // + // Parameters: + // comp - Compiler instance + // access - Access in the local + // lclNum - Parameter lcl num + // + // Returns: + // Pointer to a matching access, or nullptr if no match was found. + // + bool MapsToRegister(Compiler* comp, const Access& access, unsigned lclNum) + { + assert(lclNum < comp->info.compArgsCount); + + if (comp->lvaGetDesc(lclNum)->lvIsImplicitByRef) + { + return false; + } + + const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(lclNum); + if (abiInfo.HasAnyStackSegment()) + { + return false; + } + + for (const ABIPassingSegment& seg : abiInfo.Segments()) + { + if ((access.Offset == seg.Offset) && (genTypeSize(access.AccessType) == seg.Size) && + (varTypeUsesIntReg(access.AccessType) == genIsValidIntReg(seg.GetRegister()))) + { + return true; + } + } + + return false; + } }; // Struct used to save all struct stores involving physical promotion candidates. From 5f626abd1ed136c2e30c567a26a393d575715129 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 18 Dec 2024 19:46:06 +0100 Subject: [PATCH 09/35] Parameters in OSR functions cannot be mapped --- src/coreclr/jit/promotion.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 2fa6f1664dcff4..96f6c8c1c349d3 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -774,10 +774,9 @@ class LocalUses countReadBacks++; countReadBacksWtd += comp->fgFirstBB->getBBWeight(comp); } - - // For parameters, the backend may be able to map it directly from a register. - if (lcl->lvIsParam) + else if (lcl->lvIsParam) { + // For parameters, the backend may be able to map it directly from a register. if (MapsToRegister(comp, access, lclNum)) { // No promotion will result in a store to stack in the prolog. From 1283951664b8977f08850df4e50ebf5105d6642f Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Wed, 18 Dec 2024 20:45:50 +0100 Subject: [PATCH 10/35] Fix build on platforms with implicit byrefs, run jit-format --- src/coreclr/jit/codegencommon.cpp | 2 +- src/coreclr/jit/compiler.hpp | 5 +++-- src/coreclr/jit/lower.cpp | 18 +++++++++++------- src/coreclr/jit/lower.h | 4 ++-- src/coreclr/jit/lsra.cpp | 3 ++- src/coreclr/jit/promotion.cpp | 2 +- 6 files changed, 20 insertions(+), 14 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 07ca415564adca..9af704c02b29a5 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -5747,7 +5747,7 @@ void CodeGen::genFnProlog() #endif // OSR functions take no parameters in registers. Ensure no mappings // are present. - //assert((compiler->m_paramRegLocalMappings == nullptr) || compiler->m_paramRegLocalMappings->Empty()); + // assert((compiler->m_paramRegLocalMappings == nullptr) || compiler->m_paramRegLocalMappings->Empty()); compiler->lvaUpdateArgsWithInitialReg(); } diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 243147274cef44..5ebda933729052 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4246,8 +4246,9 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename) bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum) { LclVarDsc* varDsc = lvaGetDesc(varNum); - bool result = varDsc->lvIsParam || varDsc->lvIsParamRegTarget || lvaIsOSRLocal(varNum) || (varNum == lvaGSSecurityCookie) || - (varNum == lvaInlinedPInvokeFrameVar) || (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar); + bool result = varDsc->lvIsParam || varDsc->lvIsParamRegTarget || lvaIsOSRLocal(varNum) || + (varNum == lvaGSSecurityCookie) || (varNum == lvaInlinedPInvokeFrameVar) || + (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar); #ifdef TARGET_ARM64 result = result || (varNum == lvaFfrRegister); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 98b65f49931f38..6602955d924bd0 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8024,7 +8024,7 @@ void Lowering::FindInducedParameterRegisterLocals() } const ABIPassingInformation& dataAbiInfo = comp->lvaGetParameterABIInfo(fld->GetLclNum()); - const ABIPassingSegment* regSegment = nullptr; + const ABIPassingSegment* regSegment = nullptr; for (const ABIPassingSegment& segment : dataAbiInfo.Segments()) { if (!segment.IsPassedInRegister()) @@ -8032,7 +8032,8 @@ void Lowering::FindInducedParameterRegisterLocals() continue; } - if ((segment.Offset != fld->GetLclOffs()) || (segment.Size != genTypeSize(fld)) || (varTypeUsesIntReg(fld) != genIsValidIntReg(segment.GetRegister()))) + if ((segment.Offset != fld->GetLclOffs()) || (segment.Size != genTypeSize(fld)) || + (varTypeUsesIntReg(fld) != genIsValidIntReg(segment.GetRegister()))) { continue; } @@ -8071,13 +8072,15 @@ void Lowering::FindInducedParameterRegisterLocals() if (remappedLclNum == BAD_VAR_NUM) { - remappedLclNum = comp->lvaGrabTemp(false DEBUGARG(comp->printfAlloc("struct parameter register %s", getRegName(regSegment->GetRegister())))); + remappedLclNum = comp->lvaGrabTemp(false DEBUGARG( + comp->printfAlloc("struct parameter register %s", getRegName(regSegment->GetRegister())))); comp->lvaGetDesc(remappedLclNum)->lvType = fld->TypeGet(); JITDUMP("Created new local V%02u for the mapping\n", remappedLclNum); } else { - JITDUMP("Reusing local V%02u for store from struct parameter register %s. Store:\n", remappedLclNum, getRegName(regSegment->GetRegister())); + JITDUMP("Reusing local V%02u for store from struct parameter register %s. Store:\n", remappedLclNum, + getRegName(regSegment->GetRegister())); DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), use.User()); // The store will be a no-op, so get rid of it @@ -8093,7 +8096,8 @@ void Lowering::FindInducedParameterRegisterLocals() JITDUMP(" -> V%02u\n", remappedLclNum); GenTree* paramRegValue = comp->gtNewLclvNode(remappedLclNum, genActualType(fld)); - GenTree* storeField = comp->gtNewStoreLclFldNode(fld->GetLclNum(), fld->TypeGet(), regSegment->Offset, paramRegValue); + GenTree* storeField = + comp->gtNewStoreLclFldNode(fld->GetLclNum(), fld->TypeGet(), regSegment->Offset, paramRegValue); // Store actual parameter local from new reg local LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeField)); @@ -8107,8 +8111,8 @@ void Lowering::FindInducedParameterRegisterLocals() // are replacing comes with this normalization). if (varTypeIsSmall(fld)) { - GenTree* lcl = comp->gtNewLclvNode(remappedLclNum, genActualType(fld)); - GenTree* normalizeLcl = comp->gtNewCastNode(TYP_INT, lcl, false, fld->TypeGet()); + GenTree* lcl = comp->gtNewLclvNode(remappedLclNum, genActualType(fld)); + GenTree* normalizeLcl = comp->gtNewCastNode(TYP_INT, lcl, false, fld->TypeGet()); GenTree* storeNormalizedLcl = comp->gtNewStoreLclVarNode(remappedLclNum, normalizeLcl); LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeNormalizedLcl)); LowerNode(lcl); diff --git a/src/coreclr/jit/lower.h b/src/coreclr/jit/lower.h index 8ab242e9cd9eea..589cc6aff3824e 100644 --- a/src/coreclr/jit/lower.h +++ b/src/coreclr/jit/lower.h @@ -134,8 +134,8 @@ class Lowering final : public Phase typedef JitHashTable, bool> LocalSet; - void MapParameterRegisterLocals(); - void FindInducedParameterRegisterLocals(); + void MapParameterRegisterLocals(); + void FindInducedParameterRegisterLocals(); unsigned TryReuseLocalForParameterAccess(const LIR::Use& use, const LocalSet& storedToLocals); void LowerBlock(BasicBlock* block); diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index bd75af2ce97204..29a8f0edb3339f 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -1453,7 +1453,8 @@ void LinearScan::identifyCandidatesExceptionDataflow() assert(varDsc->lvLiveInOutOfHndlr); - if (varTypeIsGC(varDsc) && VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam && !varDsc->lvIsParamRegTarget) + if (varTypeIsGC(varDsc) && VarSetOps::IsMember(compiler, finallyVars, varIndex) && !varDsc->lvIsParam && + !varDsc->lvIsParamRegTarget) { assert(varDsc->lvMustInit); } diff --git a/src/coreclr/jit/promotion.cpp b/src/coreclr/jit/promotion.cpp index 96f6c8c1c349d3..6e086c272d2a5c 100644 --- a/src/coreclr/jit/promotion.cpp +++ b/src/coreclr/jit/promotion.cpp @@ -1040,7 +1040,7 @@ class LocalUses { assert(lclNum < comp->info.compArgsCount); - if (comp->lvaGetDesc(lclNum)->lvIsImplicitByRef) + if (comp->lvaIsImplicitByRefLocal(lclNum)) { return false; } From 0792ab5a5d5c38427bdc08be3bd7e13bfb7bb2e4 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 19 Dec 2024 12:44:05 +0100 Subject: [PATCH 11/35] Spill to both parameter and new mappings For GC refs in structs these have to be zeroed anyway, so we might as well just spill to the original parameter as well as the new mapping. This means we can avoid the manually inserted spill in the init block, and we retain the property that parameters are fully defined by the prolog. --- src/coreclr/jit/codegen.h | 7 +++-- src/coreclr/jit/codegencommon.cpp | 50 +++++++++++++++++++++---------- src/coreclr/jit/lclvars.cpp | 3 +- src/coreclr/jit/lower.cpp | 24 +++++++-------- 4 files changed, 49 insertions(+), 35 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 8d6d2ef5684cc4..75694d30a38193 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -260,9 +260,10 @@ class CodeGen final : public CodeGenInterface regMaskTP genGetParameterHomingTempRegisterCandidates(); var_types genParamStackType(LclVarDsc* dsc, const ABIPassingSegment& seg); - void genSpillOrAddRegisterParam(unsigned lclNum, const ABIPassingSegment& seg, class RegGraph* graph); - void genSpillOrAddNonStandardRegisterParam(unsigned lclNum, regNumber sourceReg, class RegGraph* graph); - void genEnregisterIncomingStackArgs(); + void genSpillOrAddRegisterParam( + unsigned lclNum, unsigned offset, unsigned paramLclNum, const ABIPassingSegment& seg, class RegGraph* graph); + void genSpillOrAddNonStandardRegisterParam(unsigned lclNum, regNumber sourceReg, class RegGraph* graph); + void genEnregisterIncomingStackArgs(); #if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) void genEnregisterOSRArgsAndLocals(regNumber initReg, bool* pInitRegZeroed); #else diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 9af704c02b29a5..4f88c798eb754f 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3239,26 +3239,25 @@ var_types CodeGen::genParamStackType(LclVarDsc* dsc, const ABIPassingSegment& se // to stack immediately, or by adding it to the register graph. // // Parameters: -// lclNum - Parameter local (or field of it) -// segment - Register segment to either spill or put in the register graph -// graph - The register graph to add to -// -void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, const ABIPassingSegment& segment, RegGraph* graph) +// lclNum - Target local +// offset - Offset into the target local +// paramLclNum - Local that is the actual parameter that has the incoming register +// segment - Register segment to either spill or put in the register graph +// graph - The register graph to add to +// +void CodeGen::genSpillOrAddRegisterParam( + unsigned lclNum, unsigned offset, unsigned paramLclNum, const ABIPassingSegment& segment, RegGraph* graph) { - regMaskTP paramRegs = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; - LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum); - - unsigned baseOffset = varDsc->lvIsStructField ? varDsc->lvFldOffset : 0; - unsigned size = varDsc->lvExactSize(); + regMaskTP paramRegs = intRegState.rsCalleeRegArgMaskLiveIn | floatRegState.rsCalleeRegArgMaskLiveIn; if (!segment.IsPassedInRegister() || ((paramRegs & genRegMask(segment.GetRegister())) == 0)) { return; } + LclVarDsc* varDsc = compiler->lvaGetDesc(lclNum); if (varDsc->lvOnFrame && (!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr)) { - unsigned paramLclNum = varDsc->lvIsStructField ? varDsc->lvParentLcl : lclNum; LclVarDsc* paramVarDsc = compiler->lvaGetDesc(paramLclNum); var_types storeType = genParamStackType(paramVarDsc, segment); @@ -3269,7 +3268,7 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, const ABIPassingSegmen } GetEmitter()->emitIns_S_R(ins_Store(storeType), emitActualTypeSize(storeType), segment.GetRegister(), lclNum, - segment.Offset - baseOffset); + offset); } if (!varDsc->lvIsInReg()) @@ -3289,7 +3288,7 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, const ABIPassingSegmen RegNode* sourceReg = graph->GetOrAdd(segment.GetRegister()); RegNode* destReg = graph->GetOrAdd(varDsc->GetRegNum()); - if ((sourceReg != destReg) || (baseOffset != segment.Offset)) + if ((sourceReg != destReg) || (offset != 0)) { #ifdef TARGET_ARM if (edgeType == TYP_DOUBLE) @@ -3303,7 +3302,7 @@ void CodeGen::genSpillOrAddRegisterParam(unsigned lclNum, const ABIPassingSegmen return; } #endif - graph->AddEdge(sourceReg, destReg, edgeType, segment.Offset - baseOffset); + graph->AddEdge(sourceReg, destReg, edgeType, offset); } } @@ -3395,6 +3394,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) // top of the underlying registers. RegGraph graph(compiler); + // Add everything to the graph, or spill directly to stack when needed. + // Note that some registers may be homed in multiple (stack) places. + // Particularly if there is a mapping to a local that does not share its + // (stack) home with the parameter local, in which case we will home it + // both into the parameter local's stack home (if it is used), but also to + // the mapping target. for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) { LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum); @@ -3410,8 +3415,21 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) const ParameterRegisterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()); - unsigned fieldLclNum = mapping != nullptr ? mapping->LclNum : lclNum; - genSpillOrAddRegisterParam(fieldLclNum, segment, &graph); + if (mapping != nullptr) + { + genSpillOrAddRegisterParam(mapping->LclNum, mapping->Offset, lclNum, segment, &graph); + + // If home is not shared with base local, then also spill to + // the base local. + if (!lclDsc->lvPromoted) + { + genSpillOrAddRegisterParam(lclNum, segment.Offset, lclNum, segment, &graph); + } + } + else + { + genSpillOrAddRegisterParam(lclNum, segment.Offset, lclNum, segment, &graph); + } } } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index d88ce324493bd4..344f018d6e17db 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -5032,8 +5032,7 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) varDsc->incRefCnts(BB_UNITY_WEIGHT, this); } } - - if (varDsc->lvIsParamRegTarget && (varDsc->lvRefCnt() > 0)) + else if (varDsc->lvIsParamRegTarget && (varDsc->lvRefCnt() > 0)) { varDsc->incRefCnts(BB_UNITY_WEIGHT, this); varDsc->incRefCnts(BB_UNITY_WEIGHT, this); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 6602955d924bd0..c34223b323e792 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7973,7 +7973,7 @@ void Lowering::MapParameterRegisterLocals() const ParameterRegisterLocalMapping& mapping = comp->m_paramRegLocalMappings->BottomRef(i); printf(" "); mapping.RegisterSegment->Dump(); - printf(" -> V%02u\n", mapping.LclNum); + printf(" -> V%02u+%u\n", mapping.LclNum, mapping.Offset); } } #endif @@ -8016,6 +8016,11 @@ void Lowering::FindInducedParameterRegisterLocals() continue; } + if (fld->TypeIs(TYP_STRUCT)) + { + continue; + } + if (storedToLocals.Lookup(fld->GetLclNum())) { // LCL_FLD does not necessarily take the value of the parameter @@ -8032,7 +8037,10 @@ void Lowering::FindInducedParameterRegisterLocals() continue; } - if ((segment.Offset != fld->GetLclOffs()) || (segment.Size != genTypeSize(fld)) || + assert(fld->GetLclOffs() <= comp->lvaLclExactSize(fld->GetLclNum())); + unsigned structAccessedSize = + min(genTypeSize(fld), comp->lvaLclExactSize(fld->GetLclNum()) - fld->GetLclOffs()); + if ((segment.Offset != fld->GetLclOffs()) || (structAccessedSize > segment.Size) || (varTypeUsesIntReg(fld) != genIsValidIntReg(segment.GetRegister()))) { continue; @@ -8095,18 +8103,6 @@ void Lowering::FindInducedParameterRegisterLocals() DBEXEC(VERBOSE, regSegment->Dump()); JITDUMP(" -> V%02u\n", remappedLclNum); - GenTree* paramRegValue = comp->gtNewLclvNode(remappedLclNum, genActualType(fld)); - GenTree* storeField = - comp->gtNewStoreLclFldNode(fld->GetLclNum(), fld->TypeGet(), regSegment->Offset, paramRegValue); - - // Store actual parameter local from new reg local - LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeField)); - LowerNode(paramRegValue); - LowerNode(storeField); - - JITDUMP("Parameter spill:\n"); - DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), storeField); - // Insert explicit normalization for small types (the LCL_FLD we // are replacing comes with this normalization). if (varTypeIsSmall(fld)) From 3279c71d0f862b3bf7330de52763fa56305d5e08 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 19 Dec 2024 12:48:05 +0100 Subject: [PATCH 12/35] Fix arm32 build --- src/coreclr/jit/codegencommon.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 4f88c798eb754f..7240cbbcee23fa 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3293,7 +3293,7 @@ void CodeGen::genSpillOrAddRegisterParam( #ifdef TARGET_ARM if (edgeType == TYP_DOUBLE) { - assert(segment.Offset == baseOffset); + assert(offset == 0); graph->AddEdge(sourceReg, destReg, TYP_FLOAT, 0); sourceReg = graph->GetOrAdd(REG_NEXT(sourceReg->reg)); From 2b8cf7497e71d2c03decf16a340e44a7f24f7700 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 19 Dec 2024 12:56:36 +0100 Subject: [PATCH 13/35] Induce mappings before lowering runs --- src/coreclr/jit/lower.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index c34223b323e792..be4c156ed85191 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7835,6 +7835,11 @@ PhaseStatus Lowering::DoPhase() comp->lvSetMinOptsDoNotEnreg(); } + if (comp->opts.OptimizationEnabled()) + { + MapParameterRegisterLocals(); + } + for (BasicBlock* const block : comp->Blocks()) { /* Make the block publicly available */ @@ -7850,11 +7855,6 @@ PhaseStatus Lowering::DoPhase() LowerBlock(block); } - if (comp->opts.OptimizationEnabled()) - { - MapParameterRegisterLocals(); - } - #ifdef DEBUG JITDUMP("Lower has completed modifying nodes.\n"); if (VERBOSE) From 22b755dc9fbf44d280d658c05a3a61d9cead4ef6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 19 Dec 2024 16:06:11 +0100 Subject: [PATCH 14/35] Avoid double lowering, always back to nop --- src/coreclr/jit/lower.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index be4c156ed85191..ff2fcce5c09373 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8111,9 +8111,6 @@ void Lowering::FindInducedParameterRegisterLocals() GenTree* normalizeLcl = comp->gtNewCastNode(TYP_INT, lcl, false, fld->TypeGet()); GenTree* storeNormalizedLcl = comp->gtNewStoreLclVarNode(remappedLclNum, normalizeLcl); LIR::AsRange(comp->fgFirstBB).InsertAtBeginning(LIR::SeqTree(comp, storeNormalizedLcl)); - LowerNode(lcl); - LowerNode(normalizeLcl); - LowerNode(storeNormalizedLcl); JITDUMP("Parameter normalization:\n"); DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), storeNormalizedLcl); @@ -8129,8 +8126,9 @@ void Lowering::FindInducedParameterRegisterLocals() LowerNode(lcl); JITDUMP("New user tree range:\n"); DISPTREERANGE(LIR::AsRange(comp->fgFirstBB), use.User()); - fld->gtBashToNOP(); } + + fld->gtBashToNOP(); } } From 43f6ba32eb1f2d2cc7892416334405e4d4455e4a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 19 Dec 2024 16:06:20 +0100 Subject: [PATCH 15/35] Add check for DNER when reusing local If they are DNER they definitely will not stay in a register --- src/coreclr/jit/lower.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index ff2fcce5c09373..edbeb488e5e394 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8153,6 +8153,11 @@ unsigned Lowering::TryReuseLocalForParameterAccess(const LIR::Use& use, const Lo return BAD_VAR_NUM; } + if (destLclDsc->lvDoNotEnregister) + { + return BAD_VAR_NUM; + } + if (storedToLocals.Lookup(useNode->AsLclVarCommon()->GetLclNum())) { // Destination may change value before this access From 1d3aba60f9dcc397edfea4525a60d20e327b83ac Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 19 Dec 2024 16:08:43 +0100 Subject: [PATCH 16/35] Add function header --- src/coreclr/jit/lower.cpp | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index edbeb488e5e394..896c66338df7a7 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8132,6 +8132,20 @@ void Lowering::FindInducedParameterRegisterLocals() } } +//------------------------------------------------------------------------ +// Lowering::TryReuseLocalForParameterAccess: +// Try to figure out if a LCL_FLD that corresponds to a parameter register is +// being stored directly to a LCL_VAR, and in that case whether it would be +// profitable to reuse that local as the parameter register. +// +// Parameters: +// use - The use of the LCL_FLD +// storedToLocals - Map of locals that have had potential definitions to them +// up until the use +// +// Returns: +// The local number to reuse, or BAD_VAR_NUM to create a new local instead. +// unsigned Lowering::TryReuseLocalForParameterAccess(const LIR::Use& use, const LocalSet& storedToLocals) { GenTree* useNode = use.User(); From 6f21d49408ba8bb134248338f291e1eeaba3ae63 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 19 Dec 2024 16:37:01 +0100 Subject: [PATCH 17/35] Ensure we check both the parameter and mapped target when determining liveness --- src/coreclr/jit/lsrabuild.cpp | 35 ++++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index f52eec0d90b166..5ccc8188fa08a0 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2177,6 +2177,7 @@ void LinearScan::buildIntervals() for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) { + LclVarDsc* lcl = compiler->lvaGetDesc(lclNum); const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); for (const ABIPassingSegment& seg : abiInfo.Segments()) { @@ -2188,18 +2189,34 @@ void LinearScan::buildIntervals() const ParameterRegisterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(seg.GetRegister()); - unsigned mappedLclNum = mapping != nullptr ? mapping->LclNum : lclNum; - JITDUMP("Arg V%02u in reg %s\n", mappedLclNum, getRegName(seg.GetRegister())); - LclVarDsc* argDsc = compiler->lvaGetDesc(mappedLclNum); - if (argDsc->lvTracked && !compiler->compJmpOpUsed && (argDsc->lvRefCnt() == 0) && - !compiler->opts.compDbgCode) + JITDUMP("Arg V%02u in reg %s\n", mapping != nullptr ? mapping->LclNum : lclNum, getRegName(seg.GetRegister())); + + bool isParameterLive = !lcl->lvTracked || compiler->compJmpOpUsed || (lcl->lvRefCnt() != 0); + bool isLive; + if (mapping != nullptr) { - // Not live - continue; + LclVarDsc* mappedLcl = compiler->lvaGetDesc(mapping->LclNum); + bool isMappedLclLive = !mappedLcl->lvTracked || compiler->compJmpOpUsed || (mappedLcl->lvRefCnt() != 0); + if (mappedLcl->lvIsStructField) + { + // Struct fields are not saved into their parameter local + isLive = isMappedLclLive; + } + else + { + isLive = isParameterLive || isMappedLclLive; + } + } + else + { + isLive = isParameterLive; } - RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? floatRegState : intRegState; - regState->rsCalleeRegArgMaskLiveIn |= seg.GetRegisterMask(); + if (isLive) + { + RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? floatRegState : intRegState; + regState->rsCalleeRegArgMaskLiveIn |= seg.GetRegisterMask(); + } } } From 4f89b1addcdca8d90c8aee759ff365a5b6011349 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 19 Dec 2024 16:37:34 +0100 Subject: [PATCH 18/35] Change a lcl description --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 896c66338df7a7..3eaa3136fb2acc 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8081,7 +8081,7 @@ void Lowering::FindInducedParameterRegisterLocals() if (remappedLclNum == BAD_VAR_NUM) { remappedLclNum = comp->lvaGrabTemp(false DEBUGARG( - comp->printfAlloc("struct parameter register %s", getRegName(regSegment->GetRegister())))); + comp->printfAlloc("parameter register V%02u.%s", fld->GetLclNum(), getRegName(regSegment->GetRegister())))); comp->lvaGetDesc(remappedLclNum)->lvType = fld->TypeGet(); JITDUMP("Created new local V%02u for the mapping\n", remappedLclNum); } From e47919d51c8056defd31c79c3565ca0e32df0b5a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 19 Dec 2024 16:37:48 +0100 Subject: [PATCH 19/35] Skip promoted locals for optimization --- src/coreclr/jit/lower.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 3eaa3136fb2acc..2bd24c701c808e 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8016,6 +8016,14 @@ void Lowering::FindInducedParameterRegisterLocals() continue; } + LclVarDsc* paramDsc = comp->lvaGetDesc(fld); + if (paramDsc->lvPromoted) + { + // These are complicated to reason about since they may be + // defined/used through their fields, so just skip them. + continue; + } + if (fld->TypeIs(TYP_STRUCT)) { continue; @@ -8162,6 +8170,11 @@ unsigned Lowering::TryReuseLocalForParameterAccess(const LIR::Use& use, const Lo return BAD_VAR_NUM; } + if (destLclDsc->lvIsStructField) + { + return BAD_VAR_NUM; + } + if (destLclDsc->TypeGet() == TYP_STRUCT) { return BAD_VAR_NUM; From cfff90046b09eb8648850a4c9aa923387f419429 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 20 Dec 2024 15:16:52 +0100 Subject: [PATCH 20/35] Unify Swift parameter register homing --- src/coreclr/jit/codegen.h | 2 +- src/coreclr/jit/codegencommon.cpp | 57 ++++--------------------------- src/coreclr/jit/codegenlinear.cpp | 2 +- 3 files changed, 9 insertions(+), 52 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index 75694d30a38193..645539b747a7d5 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -271,7 +271,7 @@ class CodeGen final : public CodeGenInterface #endif void genHomeStackSegment(unsigned lclNum, const ABIPassingSegment& seg, regNumber initReg, bool* pInitRegZeroed); - void genHomeSwiftStructParameters(bool handleStack); + void genHomeSwiftStructStackParameters(); void genHomeStackPartOfSplitParameter(regNumber initReg, bool* initRegStillZeroed); void genCheckUseBlockInit(); diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 7240cbbcee23fa..ebf25cfcff19f5 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -4343,7 +4343,7 @@ void CodeGen::genEnregisterOSRArgsAndLocals() #if defined(SWIFT_SUPPORT) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) //----------------------------------------------------------------------------- -// genHomeSwiftStructParameters: Move the incoming segment to the local stack frame. +// genHomeSwiftStructParameters: Move the incoming stack segment to the local stack frame. // // Arguments: // lclNum - Number of local variable to home @@ -4406,14 +4406,11 @@ void CodeGen::genHomeStackSegment(unsigned lclNum, #ifdef SWIFT_SUPPORT //----------------------------------------------------------------------------- -// genHomeSwiftStructParameters: -// Reassemble Swift struct parameters if necessary. +// genHomeSwiftStructStackParameters: +// Reassemble Swift struct parameters from the segments that were passed on +// stack. // -// Arguments: -// handleStack - If true, reassemble the segments that were passed on the stack. -// If false, reassemble the segments that were passed in registers. -// -void CodeGen::genHomeSwiftStructParameters(bool handleStack) +void CodeGen::genHomeSwiftStructStackParameters() { for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) { @@ -4428,33 +4425,13 @@ void CodeGen::genHomeSwiftStructParameters(bool handleStack) continue; } - JITDUMP("Homing Swift parameter V%02u: ", lclNum); + JITDUMP("Homing Swift parameter stack segments for V%02u: ", lclNum); const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); DBEXEC(VERBOSE, abiInfo.Dump()); for (const ABIPassingSegment& seg : abiInfo.Segments()) { - if (seg.IsPassedOnStack() != handleStack) - { - continue; - } - - if (seg.IsPassedInRegister()) - { - RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? &floatRegState : &intRegState; - regMaskTP regs = seg.GetRegisterMask(); - - if ((regState->rsCalleeRegArgMaskLiveIn & regs) != RBM_NONE) - { - var_types storeType = seg.GetRegisterType(); - assert(storeType != TYP_UNDEF); - GetEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), seg.GetRegister(), lclNum, - seg.Offset); - - regState->rsCalleeRegArgMaskLiveIn &= ~regs; - } - } - else + if (seg.IsPassedOnStack()) { // We can use REG_SCRATCH as a temporary register here as we ensured that during LSRA build. genHomeStackSegment(lclNum, seg, REG_SCRATCH, nullptr); @@ -5719,30 +5696,10 @@ void CodeGen::genFnProlog() #ifdef SWIFT_SUPPORT if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift) { - if ((compiler->lvaSwiftSelfArg != BAD_VAR_NUM) && - ((intRegState.rsCalleeRegArgMaskLiveIn & RBM_SWIFT_SELF) != 0) && - compiler->lvaGetDesc(compiler->lvaSwiftSelfArg)->lvOnFrame) - { - GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SWIFT_SELF, compiler->lvaSwiftSelfArg, 0); - intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SWIFT_SELF; - } - - if ((compiler->lvaSwiftIndirectResultArg != BAD_VAR_NUM) && - ((intRegState.rsCalleeRegArgMaskLiveIn & theFixedRetBuffMask(CorInfoCallConvExtension::Swift)) != 0) && - compiler->lvaGetDesc(compiler->lvaSwiftIndirectResultArg)->lvOnFrame) - { - GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, - theFixedRetBuffReg(CorInfoCallConvExtension::Swift), - compiler->lvaSwiftIndirectResultArg, 0); - intRegState.rsCalleeRegArgMaskLiveIn &= ~theFixedRetBuffMask(CorInfoCallConvExtension::Swift); - } - if (compiler->lvaSwiftErrorArg != BAD_VAR_NUM) { intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SWIFT_ERROR; } - - genHomeSwiftStructParameters(/* handleStack */ false); } #endif diff --git a/src/coreclr/jit/codegenlinear.cpp b/src/coreclr/jit/codegenlinear.cpp index 1d6ac2301c45dd..819c31394bbd9f 100644 --- a/src/coreclr/jit/codegenlinear.cpp +++ b/src/coreclr/jit/codegenlinear.cpp @@ -388,7 +388,7 @@ void CodeGen::genCodeForBBlist() // codegen related to doing this, so it cannot be done in the prolog. if (block->IsFirst() && compiler->lvaHasAnySwiftStackParamToReassemble()) { - genHomeSwiftStructParameters(/* handleStack */ true); + genHomeSwiftStructStackParameters(); } #endif From f0e6d2d95fcc3b2e58c6236b45a0c52802cbd968 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 20 Dec 2024 15:18:36 +0100 Subject: [PATCH 21/35] Run jit-format --- src/coreclr/jit/lower.cpp | 5 +++-- src/coreclr/jit/lsrabuild.cpp | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 2bd24c701c808e..32a7e288003536 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8088,8 +8088,9 @@ void Lowering::FindInducedParameterRegisterLocals() if (remappedLclNum == BAD_VAR_NUM) { - remappedLclNum = comp->lvaGrabTemp(false DEBUGARG( - comp->printfAlloc("parameter register V%02u.%s", fld->GetLclNum(), getRegName(regSegment->GetRegister())))); + remappedLclNum = + comp->lvaGrabTemp(false DEBUGARG(comp->printfAlloc("parameter register V%02u.%s", fld->GetLclNum(), + getRegName(regSegment->GetRegister())))); comp->lvaGetDesc(remappedLclNum)->lvType = fld->TypeGet(); JITDUMP("Created new local V%02u for the mapping\n", remappedLclNum); } diff --git a/src/coreclr/jit/lsrabuild.cpp b/src/coreclr/jit/lsrabuild.cpp index 5ccc8188fa08a0..79f4cae894110c 100644 --- a/src/coreclr/jit/lsrabuild.cpp +++ b/src/coreclr/jit/lsrabuild.cpp @@ -2177,7 +2177,7 @@ void LinearScan::buildIntervals() for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++) { - LclVarDsc* lcl = compiler->lvaGetDesc(lclNum); + LclVarDsc* lcl = compiler->lvaGetDesc(lclNum); const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum); for (const ABIPassingSegment& seg : abiInfo.Segments()) { @@ -2189,7 +2189,8 @@ void LinearScan::buildIntervals() const ParameterRegisterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(seg.GetRegister()); - JITDUMP("Arg V%02u in reg %s\n", mapping != nullptr ? mapping->LclNum : lclNum, getRegName(seg.GetRegister())); + JITDUMP("Arg V%02u in reg %s\n", mapping != nullptr ? mapping->LclNum : lclNum, + getRegName(seg.GetRegister())); bool isParameterLive = !lcl->lvTracked || compiler->compJmpOpUsed || (lcl->lvRefCnt() != 0); bool isLive; From 4f06d3cd19f069d39f263f7b33b7656bc82d22f6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 20 Dec 2024 16:26:30 +0100 Subject: [PATCH 22/35] Skip optimization for arm32 prespilled registers --- src/coreclr/jit/lower.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 32a7e288003536..6d8921dbf72f6b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7971,9 +7971,7 @@ void Lowering::MapParameterRegisterLocals() for (int i = 0; i < comp->m_paramRegLocalMappings->Height(); i++) { const ParameterRegisterLocalMapping& mapping = comp->m_paramRegLocalMappings->BottomRef(i); - printf(" "); - mapping.RegisterSegment->Dump(); - printf(" -> V%02u+%u\n", mapping.LclNum, mapping.Offset); + printf(" %s -> V%02u+%u\n", getRegName(mapping.RegisterSegment->GetRegister()), mapping.LclNum, mapping.Offset); } } #endif @@ -8045,6 +8043,15 @@ void Lowering::FindInducedParameterRegisterLocals() continue; } +#ifdef TARGET_ARM + if ((comp->codeGen->regSet.rsMaskPreSpillRegs(true) & segment.GetRegisterMask()) != 0) + { + // Parameter registers that are prespilled on arm32 are + // currently not supported + continue; + } +#endif + assert(fld->GetLclOffs() <= comp->lvaLclExactSize(fld->GetLclNum())); unsigned structAccessedSize = min(genTypeSize(fld), comp->lvaLclExactSize(fld->GetLclNum()) - fld->GetLclOffs()); From 3903dae7a47ed0776f9f90a6198956985aba2afb Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Dec 2024 22:17:36 +0100 Subject: [PATCH 23/35] Clean up after merge --- src/coreclr/jit/lower.cpp | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index b2fedc3fd2641f..dc44cb18faf0af 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7855,11 +7855,6 @@ PhaseStatus Lowering::DoPhase() LowerBlock(block); } - if (comp->opts.OptimizationEnabled()) - { - MapParameterRegisterLocals(); - } - #ifdef DEBUG JITDUMP("Lower has completed modifying nodes.\n"); if (VERBOSE) @@ -7969,9 +7964,8 @@ void Lowering::MapParameterRegisterLocals() comp->m_paramRegLocalMappings->Emplace(&segment, fieldLclNum, segment.Offset - fieldDsc->lvFldOffset); } - LclVarDsc* fieldLclDsc = comp->lvaGetDesc(fieldLclNum); - assert(!fieldLclDsc->lvIsParamRegTarget); - fieldLclDsc->lvIsParamRegTarget = true; + assert(!fieldDsc->lvIsParamRegTarget); + fieldDsc->lvIsParamRegTarget = true; } } From 2d683e00a0fa7913650e174340c2c8eba02be99a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Dec 2024 22:19:14 +0100 Subject: [PATCH 24/35] Run jit-format --- src/coreclr/jit/lower.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index dc44cb18faf0af..8a4c488b4b0c22 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7981,7 +7981,8 @@ void Lowering::MapParameterRegisterLocals() for (int i = 0; i < comp->m_paramRegLocalMappings->Height(); i++) { const ParameterRegisterLocalMapping& mapping = comp->m_paramRegLocalMappings->BottomRef(i); - printf(" %s -> V%02u+%u\n", getRegName(mapping.RegisterSegment->GetRegister()), mapping.LclNum, mapping.Offset); + printf(" %s -> V%02u+%u\n", getRegName(mapping.RegisterSegment->GetRegister()), mapping.LclNum, + mapping.Offset); } } #endif From 0db299ff046ab5a7e3cf756ee22d9f17a483dc2d Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Dec 2024 22:34:03 +0100 Subject: [PATCH 25/35] Attach info to reused temps --- src/coreclr/jit/lower.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 8a4c488b4b0c22..45bf3d7dc5ef0c 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8107,7 +8107,7 @@ void Lowering::FindInducedParameterRegisterLocals() if (remappedLclNum == BAD_VAR_NUM) { remappedLclNum = - comp->lvaGrabTemp(false DEBUGARG(comp->printfAlloc("parameter register V%02u.%s", fld->GetLclNum(), + comp->lvaGrabTemp(false DEBUGARG(comp->printfAlloc("V%02u.%s", fld->GetLclNum(), getRegName(regSegment->GetRegister())))); comp->lvaGetDesc(remappedLclNum)->lvType = fld->TypeGet(); JITDUMP("Created new local V%02u for the mapping\n", remappedLclNum); @@ -8121,6 +8121,15 @@ void Lowering::FindInducedParameterRegisterLocals() // The store will be a no-op, so get rid of it LIR::AsRange(comp->fgFirstBB).Remove(use.User(), true); use = LIR::Use(); + +#ifdef DEBUG + LclVarDsc* remappedLclDsc = comp->lvaGetDesc(remappedLclNum); + remappedLclDsc->lvReason = + comp->printfAlloc("%s%sV%02u.%s", + remappedLclDsc->lvReason == nullptr ? "" : remappedLclDsc->lvReason, + remappedLclDsc->lvReason == nullptr ? "" : " ", + fld->GetLclNum(), getRegName(regSegment->GetRegister())); +#endif } comp->m_paramRegLocalMappings->Emplace(regSegment, remappedLclNum, 0); From 9a23ca359f49664266f6b1fffd7bbfeef340c04a Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Dec 2024 22:37:14 +0100 Subject: [PATCH 26/35] Run jit-format --- src/coreclr/jit/lower.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 45bf3d7dc5ef0c..163e4e4d22fae2 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8106,9 +8106,8 @@ void Lowering::FindInducedParameterRegisterLocals() if (remappedLclNum == BAD_VAR_NUM) { - remappedLclNum = - comp->lvaGrabTemp(false DEBUGARG(comp->printfAlloc("V%02u.%s", fld->GetLclNum(), - getRegName(regSegment->GetRegister())))); + remappedLclNum = comp->lvaGrabTemp( + false DEBUGARG(comp->printfAlloc("V%02u.%s", fld->GetLclNum(), getRegName(regSegment->GetRegister())))); comp->lvaGetDesc(remappedLclNum)->lvType = fld->TypeGet(); JITDUMP("Created new local V%02u for the mapping\n", remappedLclNum); } @@ -8125,10 +8124,9 @@ void Lowering::FindInducedParameterRegisterLocals() #ifdef DEBUG LclVarDsc* remappedLclDsc = comp->lvaGetDesc(remappedLclNum); remappedLclDsc->lvReason = - comp->printfAlloc("%s%sV%02u.%s", - remappedLclDsc->lvReason == nullptr ? "" : remappedLclDsc->lvReason, - remappedLclDsc->lvReason == nullptr ? "" : " ", - fld->GetLclNum(), getRegName(regSegment->GetRegister())); + comp->printfAlloc("%s%sV%02u.%s", remappedLclDsc->lvReason == nullptr ? "" : remappedLclDsc->lvReason, + remappedLclDsc->lvReason == nullptr ? "" : " ", fld->GetLclNum(), + getRegName(regSegment->GetRegister())); #endif } From 8a6edafe162e7584e7d98901963b220a87df61d6 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 27 Dec 2024 00:06:33 +0100 Subject: [PATCH 27/35] Add quick early-out that avoids IR walk of first BB --- src/coreclr/jit/lower.cpp | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 163e4e4d22fae2..ec9d8395d3d110 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7995,6 +7995,32 @@ void Lowering::MapParameterRegisterLocals() // void Lowering::FindInducedParameterRegisterLocals() { + // Check if we possibly have any parameters we can induce new register + // locals from. + bool anyCandidates = false; + for (unsigned lclNum = 0; lclNum < comp->info.compArgsCount; lclNum++) + { + LclVarDsc* lcl = comp->lvaGetDesc(lclNum); + if (lcl->lvPromoted || !lcl->lvDoNotEnregister) + { + continue; + } + + const ABIPassingInformation& abiInfo = comp->lvaGetParameterABIInfo(lclNum); + if (!abiInfo.HasAnyRegisterSegment()) + { + continue; + } + + anyCandidates = true; + break; + } + + if (!anyCandidates) + { + return; + } + LocalSet storedToLocals(comp->getAllocator(CMK_ABI)); // Now look for optimization opportunities in the first block: places where // we read fields out of struct parameters that can be mapped cleanly. This From 9b402816a36096460fb67eb8dc8f89bd6109b31c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 27 Dec 2024 14:17:55 +0100 Subject: [PATCH 28/35] Avoid introducing new locals when it needs a callee save --- src/coreclr/jit/lower.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index ec9d8395d3d110..62d3332ee5d314 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8021,12 +8021,15 @@ void Lowering::FindInducedParameterRegisterLocals() return; } + bool hasRegisterKill = false; LocalSet storedToLocals(comp->getAllocator(CMK_ABI)); // Now look for optimization opportunities in the first block: places where // we read fields out of struct parameters that can be mapped cleanly. This // is frequently created by physical promotion. for (GenTree* node : LIR::AsRange(comp->fgFirstBB)) { + hasRegisterKill |= node->IsCall(); + GenTreeLclVarCommon* storeLcl; if (node->DefinesLocal(comp, &storeLcl)) { @@ -8132,6 +8135,16 @@ void Lowering::FindInducedParameterRegisterLocals() if (remappedLclNum == BAD_VAR_NUM) { + // If we have seen register kills then avoid creating a new local. + // The local is going to have to move from the parameter register + // into a callee saved register, and the callee saved register will + // need to be saved/restored to/from stack anyway. + if (hasRegisterKill) + { + JITDUMP(" ..but use happens after a call and is deemed unprofitable to create a local for\n"); + continue; + } + remappedLclNum = comp->lvaGrabTemp( false DEBUGARG(comp->printfAlloc("V%02u.%s", fld->GetLclNum(), getRegName(regSegment->GetRegister())))); comp->lvaGetDesc(remappedLclNum)->lvType = fld->TypeGet(); From fc8fa5e87d0413adde021b3a71e6954ad33a225c Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 27 Dec 2024 14:41:57 +0100 Subject: [PATCH 29/35] Run jit-format --- src/coreclr/jit/lower.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 62d3332ee5d314..06da28f4094f15 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8021,7 +8021,7 @@ void Lowering::FindInducedParameterRegisterLocals() return; } - bool hasRegisterKill = false; + bool hasRegisterKill = false; LocalSet storedToLocals(comp->getAllocator(CMK_ABI)); // Now look for optimization opportunities in the first block: places where // we read fields out of struct parameters that can be mapped cleanly. This From 2f4b176f54128fb23c09a0b797fa79d4c2813c95 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 27 Dec 2024 23:56:55 +0100 Subject: [PATCH 30/35] Fix local var dump output length for frame locs --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/lclvars.cpp | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 72f2a5d4df8485..5cc2e973d86546 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -4141,7 +4141,7 @@ class Compiler #ifdef DEBUG void lvaDumpRegLocation(unsigned lclNum); - void lvaDumpFrameLocation(unsigned lclNum); + void lvaDumpFrameLocation(unsigned lclNum, int minLength); void lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t refCntWtdWidth = 6); void lvaTableDump(FrameLayoutState curState = NO_FRAME_LAYOUT); // NO_FRAME_LAYOUT means use the current frame // layout state defined by lvaDoneFrameLayout diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 344f018d6e17db..4e1cf7b0883a80 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -7343,7 +7343,7 @@ void Compiler::lvaDumpRegLocation(unsigned lclNum) * in its home location. */ -void Compiler::lvaDumpFrameLocation(unsigned lclNum) +void Compiler::lvaDumpFrameLocation(unsigned lclNum, int minLength) { int offset; regNumber baseReg; @@ -7356,7 +7356,11 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum) baseReg = EBPbased ? REG_FPBASE : REG_SPBASE; #endif - printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); + int printed = printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); + if ((printed >= 0) && (printed < minLength)) + { + printf("%*s", minLength - printed, ""); + } } /***************************************************************************** @@ -7447,7 +7451,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r // location. Otherwise, it's always on the stack. if (lvaDoneFrameLayout != NO_FRAME_LAYOUT) { - lvaDumpFrameLocation(lclNum); + lvaDumpFrameLocation(lclNum, (int)strlen("zero-ref ")); } } } From bb7eb9f6f4d14e9bcbd7d38b92fa19208d9c0428 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 20 Dec 2024 15:16:52 +0100 Subject: [PATCH 31/35] JIT: Unify Swift parameter register homing with normal homing The normal homing is general enough now to handle the Swift parameter registers. --- src/coreclr/jit/codegencommon.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index ebf25cfcff19f5..09dc5be94bd676 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -5696,6 +5696,8 @@ void CodeGen::genFnProlog() #ifdef SWIFT_SUPPORT if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift) { + // The error arg is not actually a parameter in the ABI, so no reason to + // consider it to be live if (compiler->lvaSwiftErrorArg != BAD_VAR_NUM) { intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SWIFT_ERROR; From 1ceb64cc51a654bf80d6fa1655c54bac24a274c8 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Thu, 26 Dec 2024 19:32:39 +0100 Subject: [PATCH 32/35] Fix store size for Swift CC --- src/coreclr/jit/codegencommon.cpp | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 09dc5be94bd676..90f12c2be88629 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3211,20 +3211,30 @@ var_types CodeGen::genParamStackType(LclVarDsc* dsc, const ABIPassingSegment& se return layout->GetGCPtrType(seg.Offset / TARGET_POINTER_SIZE); } -#ifdef TARGET_ARM64 + // For the Swift calling convention the enregistered segments do + // not match the memory layout, so we need to use exact store sizes + // for the same reason as RISCV64/LA64 below. + if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift) + { + return seg.GetRegisterType(); + } + +#if defined(TARGET_ARM64) // We round struct sizes up to TYP_I_IMPL on the stack frame so we // can always use the full register size here. This allows us to // use stp more often. return TYP_I_IMPL; -#elif defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64) - // On RISC-V/LoongArch structs passed according to floating-point calling convention are enregistered one +#elif defined(TARGET_XARCH) + // Round up to use smallest possible encoding + return genActualType(seg.GetRegisterType()); +#else + // On other platforms, a safer default is to use the exact size always. For example, for + // RISC-V/LoongArch structs passed according to floating-point calling convention are enregistered one // field per register regardless of the field layout in memory, so the small int load/store instructions // must not be upsized to 4 bytes, otherwise for example: // * struct { struct{} e1,e2,e3; byte b; float f; } -- 4-byte store for 'b' would trash 'f' // * struct { float f; struct{} e1,e2,e3; byte b; } -- 4-byte store for 'b' would trash adjacent stack slot return seg.GetRegisterType(); -#else - return genActualType(seg.GetRegisterType()); #endif } default: From 84101a1d9c53155520595bf9f239155ad0fe26e2 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 28 Dec 2024 00:20:52 +0100 Subject: [PATCH 33/35] Run jit-format --- src/coreclr/jit/lclvars.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index 4e1cf7b0883a80..43361b97585d71 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -7356,7 +7356,8 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum, int minLength) baseReg = EBPbased ? REG_FPBASE : REG_SPBASE; #endif - int printed = printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); + int printed = + printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset)); if ((printed >= 0) && (printed < minLength)) { printf("%*s", minLength - printed, ""); From aa7e6a564dc23837190d5b503516f26d2d995466 Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Sat, 28 Dec 2024 03:05:26 +0100 Subject: [PATCH 34/35] Allow homing prespilled registers on arm32 --- src/coreclr/jit/codegen.h | 2 ++ src/coreclr/jit/codegenarm.cpp | 24 ++++++++++++++++++++++++ src/coreclr/jit/codegencommon.cpp | 22 +++++++++++++++------- src/coreclr/jit/compiler.cpp | 2 +- src/coreclr/jit/lower.cpp | 9 --------- 5 files changed, 42 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index d84ba8760dca00..e11bddd4fb4c20 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -389,6 +389,8 @@ class CodeGen final : public CodeGenInterface void genPopFltRegs(regMaskTP regMask); regMaskTP genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat); + regMaskTP genPrespilledUnmappedRegs(); + regMaskTP genJmpCallArgMask(); void genFreeLclFrame(unsigned frameSize, diff --git a/src/coreclr/jit/codegenarm.cpp b/src/coreclr/jit/codegenarm.cpp index 80d0e05ba1c12d..246f824fcb0fd4 100644 --- a/src/coreclr/jit/codegenarm.cpp +++ b/src/coreclr/jit/codegenarm.cpp @@ -2103,6 +2103,30 @@ regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskC } } +//----------------------------------------------------------------------------------- +// genPrespilledUnmappedRegs: Get a mask of the registers that are prespilled +// and also not mapped to any locals. +// +// Returns: +// Mask of those registers. These registers can be used safely in prolog as +// they won't be needed after prespilling. +// +regMaskTP CodeGen::genPrespilledUnmappedRegs() +{ + regMaskTP regs = regSet.rsMaskPreSpillRegs(false); + + if (compiler->m_paramRegLocalMappings != nullptr) + { + for (int i = 0; i < compiler->m_paramRegLocalMappings->Height(); i++) + { + const ParameterRegisterLocalMapping& mapping = compiler->m_paramRegLocalMappings->BottomRef(i); + regs &= ~mapping.RegisterSegment->GetRegisterMask(); + } + } + + return regs; +} + //----------------------------------------------------------------------------------- // instGen_MemoryBarrier: Emit a MemoryBarrier instruction // diff --git a/src/coreclr/jit/codegencommon.cpp b/src/coreclr/jit/codegencommon.cpp index 90f12c2be88629..13cead40ebb06f 100644 --- a/src/coreclr/jit/codegencommon.cpp +++ b/src/coreclr/jit/codegencommon.cpp @@ -3425,18 +3425,26 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed) const ParameterRegisterLocalMapping* mapping = compiler->FindParameterRegisterLocalMappingByRegister(segment.GetRegister()); + bool spillToBaseLocal = true; if (mapping != nullptr) { genSpillOrAddRegisterParam(mapping->LclNum, mapping->Offset, lclNum, segment, &graph); - // If home is not shared with base local, then also spill to - // the base local. - if (!lclDsc->lvPromoted) + // If home is shared with base local, then skip spilling to the + // base local. + if (lclDsc->lvPromoted) { - genSpillOrAddRegisterParam(lclNum, segment.Offset, lclNum, segment, &graph); + spillToBaseLocal = false; } } - else + +#ifdef TARGET_ARM + // For arm32 the spills to the base local happen as part of + // prespilling sometimes, so skip it in that case. + spillToBaseLocal &= (regSet.rsMaskPreSpillRegs(false) & segment.GetRegisterMask()) == 0; +#endif + + if (spillToBaseLocal) { genSpillOrAddRegisterParam(lclNum, segment.Offset, lclNum, segment, &graph); } @@ -3921,7 +3929,7 @@ void CodeGen::genCheckUseBlockInit() // must force spill R4/R5/R6 so that we can use them during // zero-initialization process. // - int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1; + int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~genPrespilledUnmappedRegs()) - 1; if (forceSpillRegCount > 0) regSet.rsSetRegsModified(RBM_R4); if (forceSpillRegCount > 1) @@ -5332,7 +5340,7 @@ void CodeGen::genFnProlog() // These registers will be available to use for the initReg. We just remove // all of these registers from the rsCalleeRegArgMaskLiveIn. // - intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false); + intRegState.rsCalleeRegArgMaskLiveIn &= ~genPrespilledUnmappedRegs(); #endif /* Choose the register to use for zero initialization */ diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 901c4039b5dff8..2b72c1704ae2c0 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -10447,7 +10447,7 @@ JITDBGAPI void __cdecl dVN(ValueNum vn) cVN(JitTls::GetCompiler(), vn); } -JITDBGAPI void __cdecl dRegMask(regMaskTP mask) +JITDBGAPI void __cdecl dRegMask(const regMaskTP& mask) { static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called printf("===================================================================== dRegMask %u\n", sequenceNumber++); diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 06da28f4094f15..aa7b5ea67bc09b 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -8083,15 +8083,6 @@ void Lowering::FindInducedParameterRegisterLocals() continue; } -#ifdef TARGET_ARM - if ((comp->codeGen->regSet.rsMaskPreSpillRegs(true) & segment.GetRegisterMask()) != 0) - { - // Parameter registers that are prespilled on arm32 are - // currently not supported - continue; - } -#endif - assert(fld->GetLclOffs() <= comp->lvaLclExactSize(fld->GetLclNum())); unsigned structAccessedSize = min(genTypeSize(fld), comp->lvaLclExactSize(fld->GetLclNum()) - fld->GetLclOffs()); From 62289d75880f95aa56e34c91186f8ac14e8c3d3e Mon Sep 17 00:00:00 2001 From: Jakob Botsch Nielsen Date: Fri, 10 Jan 2025 13:28:33 +0100 Subject: [PATCH 35/35] Skip induced parameter register locals for profiler hook on arm32 --- src/coreclr/jit/lower.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index aa7b5ea67bc09b..dd627025facef7 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -7995,6 +7995,16 @@ void Lowering::MapParameterRegisterLocals() // void Lowering::FindInducedParameterRegisterLocals() { +#ifdef TARGET_ARM + // On arm32 the profiler hook does not preserve arg registers, so + // parameters are prespilled and cannot stay enregistered. + if (comp->compIsProfilerHookNeeded()) + { + JITDUMP("Skipping FindInducedParameterRegisterLocals on arm32 with profiler hook\n"); + return; + } +#endif + // Check if we possibly have any parameters we can induce new register // locals from. bool anyCandidates = false;