Skip to content

Commit

Permalink
simplify stack probing on rv64
Browse files Browse the repository at this point in the history
  • Loading branch information
sirntar committed Feb 19, 2025
1 parent 995b6de commit 7d6e19a
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 235 deletions.
6 changes: 2 additions & 4 deletions src/coreclr/jit/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -425,11 +425,9 @@ class CodeGen final : public CodeGenInterface
void genOSRSaveRemainingCalleeSavedRegisters();
#endif // TARGET_AMD64

#if defined(TARGET_RISCV64)
void genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize);
#endif

#if !defined(TARGET_RISCV64)
void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn);
#endif // TARGET_RISCV64

void genPoisonFrame(regMaskTP bbRegLiveIn);

Expand Down
227 changes: 21 additions & 206 deletions src/coreclr/jit/codegenriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1635,7 +1635,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// The SP might already be in the guard page, so we must touch it BEFORE
// the alloc, not after.

// ld_w r0, 0(SP)
// tickle the page - this triggers a page fault when on the guard page
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SP, 0);

lastTouchDelta = amount;
Expand Down Expand Up @@ -1680,8 +1680,7 @@ void CodeGen::genLclHeap(GenTree* tree)
// and localloc size is a multiple of STACK_ALIGN.

// Loop:
ssize_t imm = -16;
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm);
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -16);

emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 8);
emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 0);
Expand All @@ -1693,8 +1692,8 @@ void CodeGen::genLclHeap(GenTree* tree)

emit->emitIns_R_R_I(INS_addi, emitActualTypeSize(type), regCnt, regCnt, -16);

assert(imm == (-4 << 2)); // goto loop.
emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2));
// goto Loop
emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, -4 << 2);

lastTouchDelta = 0;
}
Expand All @@ -1708,7 +1707,6 @@ void CodeGen::genLclHeap(GenTree* tree)
// case SP is on the last byte of the guard page. Thus you must
// touch SP-0 first not SP-0x1000.
//
// This is similar to the prolog code in CodeGen::genAllocLclFrame().
//
// Note that we go through a few hoops so that SP never points to
// illegal pages at any time during the tickling process.
Expand All @@ -1719,23 +1717,20 @@ void CodeGen::genLclHeap(GenTree* tree)
// addi regCnt, REG_R0, 0
//
// Skip:
// lui regTmp, eeGetPageSize()>>12
// lui regPageSize, eeGetPageSize()>>12
// addi regTmp, SP, 0
// Loop:
// lw r0, 0(SP) // tickle the page - read from the page
// sub RA, SP, regTmp // decrement SP by eeGetPageSize()
// bltu RA, regCnt, Done
// sub SP, SP,regTmp
// j Loop
// lw r0, 0(regTmp) // tickle the page - read from the page
// sub regTmp, regTmp, regPageSize
// bgeu regTmp, regCnt, Loop
//
// Done:
// mov SP, regCnt
// addi SP, regCnt, 0
//

if (tempReg == REG_NA)
tempReg = internalRegisters.Extract(tree);

regNumber rPageSize = internalRegisters.GetSingle(tree);

assert(regCnt != tempReg);
emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt);

Expand All @@ -1746,35 +1741,24 @@ void CodeGen::genLclHeap(GenTree* tree)
emit->emitIns_R_R_I(INS_beq, EA_PTRSIZE, tempReg, REG_R0, 2 << 2);
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regCnt, REG_R0, 0);

emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);

// genDefineTempLabel(loop);

// tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SPBASE, 0);

// decrement SP by eeGetPageSize()
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, tempReg, REG_SPBASE, rPageSize);
regNumber rPageSize = internalRegisters.GetSingle(tree);

assert(rPageSize != tempReg);
noway_assert(rPageSize != tempReg);

ssize_t imm = 3 << 2; // goto done.
emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, tempReg, regCnt, imm);

emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rPageSize);

imm = -4 << 2;
// Jump to loop and tickle new stack address
emit->emitIns_I(INS_j, EA_PTRSIZE, imm);
emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
regSet.verifyRegUsed(rPageSize);
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, REG_SPBASE, 0);

// Done with stack tickle loop
// genDefineTempLabel(done);
// tickle the page - this triggers a page fault when on the guard page
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, tempReg, 0);
emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tempReg, tempReg, rPageSize);

// Now just move the final value to SP
emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0);
emit->emitIns_R_R_I(INS_bgeu, EA_PTRSIZE, tempReg, regCnt, -2 << 2);

// lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space,
// we're going to assume the worst and probe.
// Move the final value to SP
emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, regCnt, 0);
}

ALLOC_DONE:
Expand Down Expand Up @@ -6701,175 +6685,6 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData)
};
}

//------------------------------------------------------------------------
// genStackProbe: Probe the stack without changing it
//
// Notes:
// This function is using loop to probe each memory page.
//
// Arguments:
// frameSize - total frame size
// rOffset - usually initial register number
// rLimit - an extra register for comparison
// rPageSize - register for storing page size
//
void CodeGen::genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize)
{
// make sure frameSize safely fits within 4 bytes
noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize);

const target_size_t pageSize = compiler->eeGetPageSize();

// According to RISC-V Privileged ISA page size should be equal 4KiB
noway_assert(pageSize == 0x1000);

emitter* emit = GetEmitter();

emit->emitLoadImmediate(EA_PTRSIZE, rLimit, -frameSize);
regSet.verifyRegUsed(rLimit);

emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, rLimit, rLimit, REG_SPBASE);

emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12);
regSet.verifyRegUsed(rPageSize);

emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, REG_SPBASE, rPageSize);

// Loop:
// tickle the page - Read from the updated SP - this triggers a page fault when on the guard page
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, rOffset, 0);
emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, rOffset, rPageSize);

// each instr is 4 bytes
// if (rOffset >= rLimit) goto Loop;
emit->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, -2 << 2);
}

//------------------------------------------------------------------------
// genAllocLclFrame: Probe the stack.
//
// Notes:
// This only does the probing; allocating the frame is done when callee-saved registers are saved.
// This is done before anything has been pushed. The previous frame might have a large outgoing argument
// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might
// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however,
// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard
// page by default, so we need to be more careful. We do an extra probe if we might not have probed
// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this
// on Windows as well just to be consistent, even though it should not be necessary.
//
// Arguments:
// frameSize - the size of the stack frame being allocated.
// initReg - register to use as a scratch register.
// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if
// this call sets 'initReg' to a non-zero value. Otherwise, it is unchanged.
// maskArgRegsLiveIn - incoming argument registers that are currently live.
//
// Return value:
// None
//
void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn)
{
assert(compiler->compGeneratingProlog);

if (frameSize == 0)
{
return;
}

// According to RISC-V Privileged ISA page size should be equal 4KiB
const target_size_t pageSize = compiler->eeGetPageSize();

assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg));

target_size_t lastTouchDelta = 0;

emitter* emit = GetEmitter();

// Emit the following sequence to 'tickle' the pages.
// Note it is important that stack pointer not change until this is complete since the tickles
// could cause a stack overflow, and we need to be able to crawl the stack afterward
// (which means the stack pointer needs to be known).

if (frameSize < pageSize)
{
// no probe needed
lastTouchDelta = frameSize;
}
else if (frameSize < 3 * pageSize)
{
// between 1 and 3 pages we will probe each page without a loop,
// because it is faster that way and doesn't cost us much
lastTouchDelta = frameSize;

for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize)
{
emit->emitIns_R_I(INS_lui, EA_PTRSIZE, initReg, probeOffset >> 12);
regSet.verifyRegUsed(initReg);

emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0);

lastTouchDelta -= pageSize;
}

assert(pInitRegZeroed != nullptr);
*pInitRegZeroed = false; // The initReg does not contain zero

assert(lastTouchDelta == frameSize % pageSize);
compiler->unwindPadding();
}
else
{
// probe each page, that we need to allocate large stack frame
assert(frameSize >= 3 * pageSize);

regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED);
availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers
// as they are currently live
availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg

noway_assert(availMask != RBM_NONE);

regMaskTP regMask = genFindLowestBit(availMask);
regNumber rLimit = genRegNumFromMask(regMask);

availMask &= ~regMask; // Remove rLimit register

noway_assert(availMask != RBM_NONE);

regMask = genFindLowestBit(availMask);
regNumber rPageSize = genRegNumFromMask(regMask);

genStackProbe((ssize_t)frameSize, initReg, rLimit, rPageSize);

assert(pInitRegZeroed != nullptr);
*pInitRegZeroed = false; // The initReg does not contain zero

lastTouchDelta = frameSize % pageSize;
compiler->unwindPadding();
}

#if STACK_PROBE_BOUNDARY_THRESHOLD_BYTES != 0
// if the last page was too far, we will make an extra probe at the bottom
if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize)
{
assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < pageSize << 1);

emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, initReg, REG_R0, frameSize);
regSet.verifyRegUsed(initReg);

emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg);
emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0);

assert(pInitRegZeroed != nullptr);
*pInitRegZeroed = false; // The initReg does not contain zero

compiler->unwindPadding();
}
#endif
}

void CodeGen::genJumpToThrowHlpBlk_la(
SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2)
{
Expand Down
25 changes: 0 additions & 25 deletions src/coreclr/jit/emitriscv64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3351,31 +3351,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
if (vt == TYP_REF || vt == TYP_BYREF)
emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum));
}
// if (emitInsWritesToLclVarStackLocPair(id))
//{
// unsigned ofs2 = ofs + TARGET_POINTER_SIZE;
// if (id->idGCrefReg2() != GCT_NONE)
// {
// emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), *dp);
// }
// else
// {
// // If the type of the local is a gc ref type, update the liveness.
// var_types vt;
// if (varNum >= 0)
// {
// // "Regular" (non-spill-temp) local.
// vt = var_types(emitComp->lvaTable[varNum].lvType);
// }
// else
// {
// TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum);
// vt = tmpDsc->tdTempType();
// }
// if (vt == TYP_REF || vt == TYP_BYREF)
// emitGCvarDeadUpd(adr + ofs2, *dp);
// }
//}
}

#ifdef DEBUG
Expand Down

0 comments on commit 7d6e19a

Please sign in to comment.