Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT: Optimize struct parameter register accesses in the backend #110819

Draft
wants to merge 36 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
794536f
Foo
jakobbotsch Dec 17, 2024
db15f94
Remove optimization for now
jakobbotsch Dec 17, 2024
8e8c913
Add an lvTracked check, remove dead code
jakobbotsch Dec 17, 2024
e504b4b
Support insertions for now
jakobbotsch Dec 17, 2024
bf196bd
Run jit-format
jakobbotsch Dec 17, 2024
559baf0
Avoid homing Swift parameters if they are !lvOnFrame
jakobbotsch Dec 18, 2024
6d1b57e
Rename
jakobbotsch Dec 18, 2024
058e8dc
JIT: Optimize struct parameter register accesses in the backend
jakobbotsch Dec 18, 2024
5f626ab
Parameters in OSR functions cannot be mapped
jakobbotsch Dec 18, 2024
1283951
Fix build on platforms with implicit byrefs, run jit-format
jakobbotsch Dec 18, 2024
0792ab5
Spill to both parameter and new mappings
jakobbotsch Dec 19, 2024
3279c71
Fix arm32 build
jakobbotsch Dec 19, 2024
2b8cf74
Induce mappings before lowering runs
jakobbotsch Dec 19, 2024
22b755d
Avoid double lowering, always back to nop
jakobbotsch Dec 19, 2024
43f6ba3
Add check for DNER when reusing local
jakobbotsch Dec 19, 2024
1d3aba6
Add function header
jakobbotsch Dec 19, 2024
6f21d49
Ensure we check both the parameter and mapped target when determining…
jakobbotsch Dec 19, 2024
4f89b1a
Change a lcl description
jakobbotsch Dec 19, 2024
e47919d
Skip promoted locals for optimization
jakobbotsch Dec 19, 2024
cfff900
Unify Swift parameter register homing
jakobbotsch Dec 20, 2024
f0e6d2d
Run jit-format
jakobbotsch Dec 20, 2024
4f06d3c
Skip optimization for arm32 prespilled registers
jakobbotsch Dec 20, 2024
1c9bd2c
Merge branch 'main' of github.com:dotnet/runtime into physical-promot…
jakobbotsch Dec 26, 2024
3903dae
Clean up after merge
jakobbotsch Dec 26, 2024
2d683e0
Run jit-format
jakobbotsch Dec 26, 2024
0db299f
Attach info to reused temps
jakobbotsch Dec 26, 2024
9a23ca3
Run jit-format
jakobbotsch Dec 26, 2024
8a6edaf
Add quick early-out that avoids IR walk of first BB
jakobbotsch Dec 26, 2024
9b40281
Avoid introducing new locals when it needs a callee save
jakobbotsch Dec 27, 2024
fc8fa5e
Run jit-format
jakobbotsch Dec 27, 2024
2f4b176
Fix local var dump output length for frame locs
jakobbotsch Dec 27, 2024
bb7eb9f
JIT: Unify Swift parameter register homing with normal homing
jakobbotsch Dec 20, 2024
1ceb64c
Fix store size for Swift CC
jakobbotsch Dec 26, 2024
84101a1
Run jit-format
jakobbotsch Dec 27, 2024
aa7e6a5
Allow homing prespilled registers on arm32
jakobbotsch Dec 28, 2024
62289d7
Skip induced parameter register locals for profiler hook on arm32
jakobbotsch Jan 10, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ class CodeGen final : public CodeGenInterface
#endif

void genHomeStackSegment(unsigned lclNum, const ABIPassingSegment& seg, regNumber initReg, bool* pInitRegZeroed);
void genHomeSwiftStructParameters(bool handleStack);
void genHomeSwiftStructStackParameters();
void genHomeStackPartOfSplitParameter(regNumber initReg, bool* initRegStillZeroed);

void genCheckUseBlockInit();
Expand Down Expand Up @@ -389,6 +389,8 @@ class CodeGen final : public CodeGenInterface
void genPopFltRegs(regMaskTP regMask);
regMaskTP genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskCalleeSavedFloat);

regMaskTP genPrespilledUnmappedRegs();

regMaskTP genJmpCallArgMask();

void genFreeLclFrame(unsigned frameSize,
Expand Down
24 changes: 24 additions & 0 deletions src/coreclr/jit/codegenarm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2103,6 +2103,30 @@ regMaskTP CodeGen::genStackAllocRegisterMask(unsigned frameSize, regMaskTP maskC
}
}

//-----------------------------------------------------------------------------------
// genPrespilledUnmappedRegs: Get a mask of the registers that are prespilled
// and also not mapped to any locals.
//
// Returns:
// Mask of those registers. These registers can be used safely in prolog as
// they won't be needed after prespilling.
//
regMaskTP CodeGen::genPrespilledUnmappedRegs()
{
regMaskTP regs = regSet.rsMaskPreSpillRegs(false);

if (compiler->m_paramRegLocalMappings != nullptr)
{
for (int i = 0; i < compiler->m_paramRegLocalMappings->Height(); i++)
{
const ParameterRegisterLocalMapping& mapping = compiler->m_paramRegLocalMappings->BottomRef(i);
regs &= ~mapping.RegisterSegment->GetRegisterMask();
}
}

return regs;
}

//-----------------------------------------------------------------------------------
// instGen_MemoryBarrier: Emit a MemoryBarrier instruction
//
Expand Down
110 changes: 52 additions & 58 deletions src/coreclr/jit/codegencommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3211,20 +3211,30 @@ var_types CodeGen::genParamStackType(LclVarDsc* dsc, const ABIPassingSegment& se
return layout->GetGCPtrType(seg.Offset / TARGET_POINTER_SIZE);
}

#ifdef TARGET_ARM64
// For the Swift calling convention the enregistered segments do
// not match the memory layout, so we need to use exact store sizes
// for the same reason as RISCV64/LA64 below.
if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift)
{
return seg.GetRegisterType();
}

#if defined(TARGET_ARM64)
// We round struct sizes up to TYP_I_IMPL on the stack frame so we
// can always use the full register size here. This allows us to
// use stp more often.
return TYP_I_IMPL;
#elif defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
// On RISC-V/LoongArch structs passed according to floating-point calling convention are enregistered one
#elif defined(TARGET_XARCH)
// Round up to use smallest possible encoding
return genActualType(seg.GetRegisterType());
#else
// On other platforms, a safer default is to use the exact size always. For example, for
// RISC-V/LoongArch structs passed according to floating-point calling convention are enregistered one
// field per register regardless of the field layout in memory, so the small int load/store instructions
// must not be upsized to 4 bytes, otherwise for example:
// * struct { struct{} e1,e2,e3; byte b; float f; } -- 4-byte store for 'b' would trash 'f'
// * struct { float f; struct{} e1,e2,e3; byte b; } -- 4-byte store for 'b' would trash adjacent stack slot
return seg.GetRegisterType();
#else
return genActualType(seg.GetRegisterType());
#endif
}
default:
Expand Down Expand Up @@ -3394,6 +3404,12 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed)
// top of the underlying registers.
RegGraph graph(compiler);

// Add everything to the graph, or spill directly to stack when needed.
// Note that some registers may be homed in multiple (stack) places.
// Particularly if there is a mapping to a local that does not share its
// (stack) home with the parameter local, in which case we will home it
// both into the parameter local's stack home (if it is used), but also to
// the mapping target.
for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++)
{
LclVarDsc* lclDsc = compiler->lvaGetDesc(lclNum);
Expand All @@ -3409,11 +3425,26 @@ void CodeGen::genHomeRegisterParams(regNumber initReg, bool* initRegStillZeroed)
const ParameterRegisterLocalMapping* mapping =
compiler->FindParameterRegisterLocalMappingByRegister(segment.GetRegister());

bool spillToBaseLocal = true;
if (mapping != nullptr)
{
genSpillOrAddRegisterParam(mapping->LclNum, mapping->Offset, lclNum, segment, &graph);

// If home is shared with base local, then skip spilling to the
// base local.
if (lclDsc->lvPromoted)
{
spillToBaseLocal = false;
}
}
else

#ifdef TARGET_ARM
// For arm32 the spills to the base local happen as part of
// prespilling sometimes, so skip it in that case.
spillToBaseLocal &= (regSet.rsMaskPreSpillRegs(false) & segment.GetRegisterMask()) == 0;
#endif

if (spillToBaseLocal)
{
genSpillOrAddRegisterParam(lclNum, segment.Offset, lclNum, segment, &graph);
}
Expand Down Expand Up @@ -3898,7 +3929,7 @@ void CodeGen::genCheckUseBlockInit()
// must force spill R4/R5/R6 so that we can use them during
// zero-initialization process.
//
int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~regSet.rsMaskPreSpillRegs(false)) - 1;
int forceSpillRegCount = genCountBits(maskCalleeRegArgMask & ~genPrespilledUnmappedRegs()) - 1;
if (forceSpillRegCount > 0)
regSet.rsSetRegsModified(RBM_R4);
if (forceSpillRegCount > 1)
Expand Down Expand Up @@ -4330,7 +4361,7 @@ void CodeGen::genEnregisterOSRArgsAndLocals()

#if defined(SWIFT_SUPPORT) || defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
//-----------------------------------------------------------------------------
// genHomeSwiftStructParameters: Move the incoming segment to the local stack frame.
// genHomeSwiftStructParameters: Move the incoming stack segment to the local stack frame.
//
// Arguments:
// lclNum - Number of local variable to home
Expand Down Expand Up @@ -4393,14 +4424,11 @@ void CodeGen::genHomeStackSegment(unsigned lclNum,
#ifdef SWIFT_SUPPORT

//-----------------------------------------------------------------------------
// genHomeSwiftStructParameters:
// Reassemble Swift struct parameters if necessary.
//
// Arguments:
// handleStack - If true, reassemble the segments that were passed on the stack.
// If false, reassemble the segments that were passed in registers.
// genHomeSwiftStructStackParameters:
// Reassemble Swift struct parameters from the segments that were passed on
// stack.
//
void CodeGen::genHomeSwiftStructParameters(bool handleStack)
void CodeGen::genHomeSwiftStructStackParameters()
{
for (unsigned lclNum = 0; lclNum < compiler->info.compArgsCount; lclNum++)
{
Expand All @@ -4415,33 +4443,13 @@ void CodeGen::genHomeSwiftStructParameters(bool handleStack)
continue;
}

JITDUMP("Homing Swift parameter V%02u: ", lclNum);
JITDUMP("Homing Swift parameter stack segments for V%02u: ", lclNum);
const ABIPassingInformation& abiInfo = compiler->lvaGetParameterABIInfo(lclNum);
DBEXEC(VERBOSE, abiInfo.Dump());

for (const ABIPassingSegment& seg : abiInfo.Segments())
{
if (seg.IsPassedOnStack() != handleStack)
{
continue;
}

if (seg.IsPassedInRegister())
{
RegState* regState = genIsValidFloatReg(seg.GetRegister()) ? &floatRegState : &intRegState;
regMaskTP regs = seg.GetRegisterMask();

if ((regState->rsCalleeRegArgMaskLiveIn & regs) != RBM_NONE)
{
var_types storeType = seg.GetRegisterType();
assert(storeType != TYP_UNDEF);
GetEmitter()->emitIns_S_R(ins_Store(storeType), emitTypeSize(storeType), seg.GetRegister(), lclNum,
seg.Offset);

regState->rsCalleeRegArgMaskLiveIn &= ~regs;
}
}
else
if (seg.IsPassedOnStack())
{
// We can use REG_SCRATCH as a temporary register here as we ensured that during LSRA build.
genHomeStackSegment(lclNum, seg, REG_SCRATCH, nullptr);
Expand Down Expand Up @@ -5332,7 +5340,7 @@ void CodeGen::genFnProlog()
// These registers will be available to use for the initReg. We just remove
// all of these registers from the rsCalleeRegArgMaskLiveIn.
//
intRegState.rsCalleeRegArgMaskLiveIn &= ~regSet.rsMaskPreSpillRegs(false);
intRegState.rsCalleeRegArgMaskLiveIn &= ~genPrespilledUnmappedRegs();
#endif

/* Choose the register to use for zero initialization */
Expand Down Expand Up @@ -5706,30 +5714,12 @@ void CodeGen::genFnProlog()
#ifdef SWIFT_SUPPORT
if (compiler->info.compCallConv == CorInfoCallConvExtension::Swift)
{
if ((compiler->lvaSwiftSelfArg != BAD_VAR_NUM) &&
((intRegState.rsCalleeRegArgMaskLiveIn & RBM_SWIFT_SELF) != 0) &&
compiler->lvaGetDesc(compiler->lvaSwiftSelfArg)->lvOnFrame)
{
GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE, REG_SWIFT_SELF, compiler->lvaSwiftSelfArg, 0);
intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SWIFT_SELF;
}

if ((compiler->lvaSwiftIndirectResultArg != BAD_VAR_NUM) &&
((intRegState.rsCalleeRegArgMaskLiveIn & theFixedRetBuffMask(CorInfoCallConvExtension::Swift)) != 0) &&
compiler->lvaGetDesc(compiler->lvaSwiftIndirectResultArg)->lvOnFrame)
{
GetEmitter()->emitIns_S_R(ins_Store(TYP_I_IMPL), EA_PTRSIZE,
theFixedRetBuffReg(CorInfoCallConvExtension::Swift),
compiler->lvaSwiftIndirectResultArg, 0);
intRegState.rsCalleeRegArgMaskLiveIn &= ~theFixedRetBuffMask(CorInfoCallConvExtension::Swift);
}

// The error arg is not actually a parameter in the ABI, so no reason to
// consider it to be live
if (compiler->lvaSwiftErrorArg != BAD_VAR_NUM)
{
intRegState.rsCalleeRegArgMaskLiveIn &= ~RBM_SWIFT_ERROR;
}

genHomeSwiftStructParameters(/* handleStack */ false);
}
#endif

Expand All @@ -5750,6 +5740,10 @@ void CodeGen::genFnProlog()
#else
genEnregisterOSRArgsAndLocals();
#endif
// OSR functions take no parameters in registers. Ensure no mappings
// are present.
// assert((compiler->m_paramRegLocalMappings == nullptr) || compiler->m_paramRegLocalMappings->Empty());

compiler->lvaUpdateArgsWithInitialReg();
}
else
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/codegenlinear.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ void CodeGen::genCodeForBBlist()
// codegen related to doing this, so it cannot be done in the prolog.
if (block->IsFirst() && compiler->lvaHasAnySwiftStackParamToReassemble())
{
genHomeSwiftStructParameters(/* handleStack */ true);
genHomeSwiftStructStackParameters();
}
#endif

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10447,7 +10447,7 @@ JITDBGAPI void __cdecl dVN(ValueNum vn)
cVN(JitTls::GetCompiler(), vn);
}

JITDBGAPI void __cdecl dRegMask(regMaskTP mask)
JITDBGAPI void __cdecl dRegMask(const regMaskTP& mask)
{
static unsigned sequenceNumber = 0; // separate calls with a number to indicate this function has been called
printf("===================================================================== dRegMask %u\n", sequenceNumber++);
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4141,7 +4141,7 @@ class Compiler

#ifdef DEBUG
void lvaDumpRegLocation(unsigned lclNum);
void lvaDumpFrameLocation(unsigned lclNum);
void lvaDumpFrameLocation(unsigned lclNum, int minLength);
void lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t refCntWtdWidth = 6);
void lvaTableDump(FrameLayoutState curState = NO_FRAME_LAYOUT); // NO_FRAME_LAYOUT means use the current frame
// layout state defined by lvaDoneFrameLayout
Expand Down
5 changes: 3 additions & 2 deletions src/coreclr/jit/compiler.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4247,8 +4247,9 @@ inline void Compiler::CLR_API_Leave(API_ICorJitInfo_Names ename)
bool Compiler::fgVarIsNeverZeroInitializedInProlog(unsigned varNum)
{
LclVarDsc* varDsc = lvaGetDesc(varNum);
bool result = varDsc->lvIsParam || lvaIsOSRLocal(varNum) || (varNum == lvaGSSecurityCookie) ||
(varNum == lvaInlinedPInvokeFrameVar) || (varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar);
bool result = varDsc->lvIsParam || varDsc->lvIsParamRegTarget || lvaIsOSRLocal(varNum) ||
(varNum == lvaGSSecurityCookie) || (varNum == lvaInlinedPInvokeFrameVar) ||
(varNum == lvaStubArgumentVar) || (varNum == lvaRetAddrVar);

#ifdef TARGET_ARM64
result = result || (varNum == lvaFfrRegister);
Expand Down
20 changes: 15 additions & 5 deletions src/coreclr/jit/lclvars.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4938,8 +4938,8 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)
// that was set by past phases.
if (!isRecompute)
{
varDsc->lvSingleDef = varDsc->lvIsParam;
varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam;
varDsc->lvSingleDef = varDsc->lvIsParam || varDsc->lvIsParamRegTarget;
varDsc->lvSingleDefRegCandidate = varDsc->lvIsParam || varDsc->lvIsParamRegTarget;

varDsc->lvAllDefsAreNoGc = (varDsc->lvImplicitlyReferenced == false);
}
Expand Down Expand Up @@ -5032,6 +5032,11 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers)
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
}
}
else if (varDsc->lvIsParamRegTarget && (varDsc->lvRefCnt() > 0))
{
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
varDsc->incRefCnts(BB_UNITY_WEIGHT, this);
}

// If we have JMP, all arguments must have a location
// even if we don't use them inside the method
Expand Down Expand Up @@ -7338,7 +7343,7 @@ void Compiler::lvaDumpRegLocation(unsigned lclNum)
* in its home location.
*/

void Compiler::lvaDumpFrameLocation(unsigned lclNum)
void Compiler::lvaDumpFrameLocation(unsigned lclNum, int minLength)
{
int offset;
regNumber baseReg;
Expand All @@ -7351,7 +7356,12 @@ void Compiler::lvaDumpFrameLocation(unsigned lclNum)
baseReg = EBPbased ? REG_FPBASE : REG_SPBASE;
#endif

printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
int printed =
printf("[%2s%1s0x%02X] ", getRegName(baseReg), (offset < 0 ? "-" : "+"), (offset < 0 ? -offset : offset));
if ((printed >= 0) && (printed < minLength))
{
printf("%*s", minLength - printed, "");
}
}

/*****************************************************************************
Expand Down Expand Up @@ -7442,7 +7452,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r
// location. Otherwise, it's always on the stack.
if (lvaDoneFrameLayout != NO_FRAME_LAYOUT)
{
lvaDumpFrameLocation(lclNum);
lvaDumpFrameLocation(lclNum, (int)strlen("zero-ref "));
}
}
}
Expand Down
Loading
Loading