Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISC-V] Simplify stack probing on riscv64 #112697

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -425,10 +425,6 @@ class CodeGen final : public CodeGenInterface
void genOSRSaveRemainingCalleeSavedRegisters();
#endif // TARGET_AMD64

#if defined(TARGET_RISCV64)
void genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize);
#endif

void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);

void genPoisonFrame(regMaskTP bbRegLiveIn);
Expand Down
227 changes: 21 additions & 206 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1635,7 +1635,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// The SP might already be in the guard page, so we must touch it BEFORE
// the alloc, not after.

// ld_w r0, 0(SP)
// tickle the page - this triggers a page fault when on the guard page
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SP, 0);

lastTouchDelta = amount;
Expand Down Expand Up @@ -1680,8 +1680,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// and localloc size is a multiple of STACK_ALIGN.

// Loop:
ssize_t imm = -16;
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -16);

emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 8);
emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 0);
Expand All @@ -1693,8 +1692,8 @@ void CodeGen::genLclHeap(GenTree* tree)

emit->emitIns_R_R_I(INS_addi, emitActualTypeSize(type), regCnt, regCnt, -16);

assert(imm == (-4 << 2)); // goto loop.
emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2));
// goto Loop
emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, -4 << 2);

lastTouchDelta = 0;
}
Expand All @@ -1708,7 +1707,6 @@ void CodeGen::genLclHeap(GenTree* tree)
// case SP is on the last byte of the guard page. Thus you must
// touch SP-0 first not SP-0x1000.
//
// This is similar to the prolog code in CodeGen::genAllocLclFrame().
//
// Note that we go through a few hoops so that SP never points to
// illegal pages at any time during the tickling process.
Expand All @@ -1719,23 +1717,20 @@ void CodeGen::genLclHeap(GenTree* tree)
// addi regCnt, REG_R0, 0
//
// Skip:
// lui regTmp, eeGetPageSize()>>12
// lui regPageSize, eeGetPageSize()>>12
// addi regTmp, SP, 0
// Loop:
// lw r0, 0(SP) // tickle the page - read from the page
// sub RA, SP, regTmp // decrement SP by eeGetPageSize()
// bltu RA, regCnt, Done
// sub SP, SP,regTmp
// j Loop
// lw r0, 0(regTmp) // tickle the page - read from the page
// sub regTmp, regTmp, regPageSize
// bgeu regTmp, regCnt, Loop
//
// Done:
// mov SP, regCnt
// addi SP, regCnt, 0
//

if (tempReg == REG_NA)
tempReg = internalRegisters.Extract(tree);

regNumber rPageSize = internalRegisters.GetSingle(tree);

assert(regCnt != tempReg);
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt);

Expand All @@ -1746,35 +1741,24 @@ void CodeGen::genLclHeap(GenTree* tree)
emit->emitIns_R_R_I(INS_beq, EA_PTRSIZE, tempReg, REG_R0, 2 << 2);
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regCnt, REG_R0, 0);

emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);

// genDefineTempLabel(loop);

// tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SPBASE, 0);

// decrement SP by eeGetPageSize()
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, tempReg, REG_SPBASE, rPageSize);

assert(rPageSize != tempReg);

ssize_t imm = 3 << 2; // goto done.
emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, tempReg, regCnt, imm);
regNumber rPageSize = internalRegisters.GetSingle(tree);

emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rPageSize);
noway_assert(rPageSize != tempReg);

imm = -4 << 2;
// Jump to loop and tickle new stack address
emit->emitIns_I(INS_j, EA_PTRSIZE, imm);
emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
regSet.verifyRegUsed(rPageSize);
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, REG_SPBASE, 0);

// Done with stack tickle loop
// genDefineTempLabel(done);
// tickle the page - this triggers a page fault when on the guard page
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, tempReg, 0);
emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tempReg, tempReg, rPageSize);

// Now just move the final value to SP
emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0);
emit->emitIns_R_R_I(INS_bgeu, EA_PTRSIZE, tempReg, regCnt, -2 << 2);

// lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
// we're going to assume the worst and probe.
// Move the final value to SP
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, regCnt, 0);
}

ALLOC_DONE:
Expand Down Expand Up @@ -6676,175 +6660,6 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
};
}

//------------------------------------------------------------------------
// genStackProbe: Probe the stack without changing it
//
// Notes:
// This function is using loop to probe each memory page.
//
// Arguments:
// frameSize - total frame size
// rOffset - usually initial register number
// rLimit - an extra register for comparison
// rPageSize - register for storing page size
//
void CodeGen::genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize)
{
// make sure frameSize safely fits within 4 bytes
noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize);

const target_size_t pageSize = compiler->eeGetPageSize();

// According to RISC-V Privileged ISA page size should be equal 4KiB
noway_assert(pageSize == 0x1000);

emitter* emit = GetEmitter();

emit->emitLoadImmediate(EA_PTRSIZE, rLimit, -frameSize);
regSet.verifyRegUsed(rLimit);

emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, rLimit, rLimit, REG_SPBASE);

emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
regSet.verifyRegUsed(rPageSize);

emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, REG_SPBASE, rPageSize);

// Loop:
// tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, rOffset, 0);
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, rOffset, rPageSize);

// each instr is 4 bytes
// if (rOffset >= rLimit) goto Loop;
emit->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, -2 << 2);
}

//------------------------------------------------------------------------
// genAllocLclFrame: Probe the stack.
//
// Notes:
// This only does the probing; allocating the frame is done when callee-saved registers are saved.
// This is done before anything has been pushed. The previous frame might have a large outgoing argument
// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
// page by default, so we need to be more careful. We do an extra probe if we might not have probed
// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
// on Windows as well just to be consistent, even though it should not be necessary.
//
// Arguments:
// frameSize - the size of the stack frame being allocated.
// initReg - register to use as a scratch register.
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
// this call sets 'initReg' to a non-zero value. Otherwise, it is unchanged.
// maskArgRegsLiveIn - incoming argument registers that are currently live.
//
// Return value:
// None
//
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
{
assert(compiler->compGeneratingProlog);

if (frameSize == 0)
{
return;
}

// According to RISC-V Privileged ISA page size should be equal 4KiB
const target_size_t pageSize = compiler->eeGetPageSize();

assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));

target_size_t lastTouchDelta = 0;

emitter* emit = GetEmitter();

// Emit the following sequence to 'tickle' the pages.
// Note it is important that stack pointer not change until this is complete since the tickles
// could cause a stack overflow, and we need to be able to crawl the stack afterward
// (which means the stack pointer needs to be known).

if (frameSize < pageSize)
{
// no probe needed
lastTouchDelta = frameSize;
}
else if (frameSize < 3 * pageSize)
{
// between 1 and 3 pages we will probe each page without a loop,
// because it is faster that way and doesn't cost us much
lastTouchDelta = frameSize;

for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
{
emit->emitIns_R_I(INS_lui, EA_PTRSIZE, initReg, probeOffset >> 12);
regSet.verifyRegUsed(initReg);

emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0);

lastTouchDelta -= pageSize;
}

assert(pInitRegZeroed != nullptr);
*pInitRegZeroed = false; // The initReg does not contain zero

assert(lastTouchDelta == frameSize % pageSize);
compiler->unwindPadding();
}
else
{
// probe each page, that we need to allocate large stack frame
assert(frameSize >= 3 * pageSize);

regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers
// as they are currently live
availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg

noway_assert(availMask != RBM_NONE);

regMaskTP regMask = genFindLowestBit(availMask);
regNumber rLimit = genRegNumFromMask(regMask);

availMask &= ~regMask; // Remove rLimit register

noway_assert(availMask != RBM_NONE);

regMask = genFindLowestBit(availMask);
regNumber rPageSize = genRegNumFromMask(regMask);

genStackProbe((ssize_t)frameSize, initReg, rLimit, rPageSize);

assert(pInitRegZeroed != nullptr);
*pInitRegZeroed = false; // The initReg does not contain zero

lastTouchDelta = frameSize % pageSize;
compiler->unwindPadding();
}

#if STACK_PROBE_BOUNDARY_THRESHOLD_BYTES != 0
// if the last page was too far, we will make an extra probe at the bottom
if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
{
assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < pageSize << 1);

emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, initReg, REG_R0, frameSize);
regSet.verifyRegUsed(initReg);

emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0);

assert(pInitRegZeroed != nullptr);
*pInitRegZeroed = false; // The initReg does not contain zero

compiler->unwindPadding();
}
#endif
}

void CodeGen::genJumpToThrowHlpBlk_la(
SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2)
{
Expand Down
25 changes: 0 additions & 25 deletions src/coreclr/jit/emitriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3351,31 +3351,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
if (vt == TYP_REF || vt == TYP_BYREF)
emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum));
}
// if (emitInsWritesToLclVarStackLocPair(id))
//{
// unsigned ofs2 = ofs + TARGET_POINTER_SIZE;
// if (id->idGCrefReg2() != GCT_NONE)
// {
// emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), *dp);
// }
// else
// {
// // If the type of the local is a gc ref type, update the liveness.
// var_types vt;
// if (varNum >= 0)
// {
// // "Regular" (non-spill-temp) local.
// vt = var_types(emitComp->lvaTable[varNum].lvType);
// }
// else
// {
// TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum);
// vt = tmpDsc->tdTempType();
// }
// if (vt == TYP_REF || vt == TYP_BYREF)
// emitGCvarDeadUpd(adr + ofs2, *dp);
// }
//}
}

#ifdef DEBUG
Expand Down
Loading