From 7d6e19a5b0df8dd5a5fcb2ea9187f4221b0fdd92 Mon Sep 17 00:00:00 2001 From: Tymoteusz Wenerski Date: Wed, 19 Feb 2025 16:45:24 +0100 Subject: [PATCH] simplify stack probing on rv64 --- src/coreclr/jit/codegen.h | 6 +- src/coreclr/jit/codegenriscv64.cpp | 227 +++-------------------------- src/coreclr/jit/emitriscv64.cpp | 25 ---- 3 files changed, 23 insertions(+), 235 deletions(-) diff --git a/src/coreclr/jit/codegen.h b/src/coreclr/jit/codegen.h index b26c93534b2f9d..99f5c3bec2d09b 100644 --- a/src/coreclr/jit/codegen.h +++ b/src/coreclr/jit/codegen.h @@ -425,11 +425,9 @@ class CodeGen final : public CodeGenInterface void genOSRSaveRemainingCalleeSavedRegisters(); #endif // TARGET_AMD64 -#if defined(TARGET_RISCV64) - void genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize); -#endif - +#if !defined(TARGET_RISCV64) void genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn); +#endif // TARGET_RISCV64 void genPoisonFrame(regMaskTP bbRegLiveIn); diff --git a/src/coreclr/jit/codegenriscv64.cpp b/src/coreclr/jit/codegenriscv64.cpp index 54cd716b1ccebd..beedce38c58705 100644 --- a/src/coreclr/jit/codegenriscv64.cpp +++ b/src/coreclr/jit/codegenriscv64.cpp @@ -1635,7 +1635,7 @@ void CodeGen::genLclHeap(GenTree* tree) // The SP might already be in the guard page, so we must touch it BEFORE // the alloc, not after. - // ld_w r0, 0(SP) + // tickle the page - this triggers a page fault when on the guard page emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SP, 0); lastTouchDelta = amount; @@ -1680,8 +1680,7 @@ void CodeGen::genLclHeap(GenTree* tree) // and localloc size is a multiple of STACK_ALIGN. // Loop: - ssize_t imm = -16; - emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, imm); + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, -16); emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 8); emit->emitIns_R_R_I(INS_sd, EA_PTRSIZE, REG_R0, REG_SPBASE, 0); @@ -1693,8 +1692,8 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_R_R_I(INS_addi, emitActualTypeSize(type), regCnt, regCnt, -16); - assert(imm == (-4 << 2)); // goto loop. - emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, (-4 << 2)); + // goto Loop + emit->emitIns_R_R_I(INS_bne, EA_PTRSIZE, regCnt, REG_R0, -4 << 2); lastTouchDelta = 0; } @@ -1708,7 +1707,6 @@ void CodeGen::genLclHeap(GenTree* tree) // case SP is on the last byte of the guard page. Thus you must // touch SP-0 first not SP-0x1000. // - // This is similar to the prolog code in CodeGen::genAllocLclFrame(). // // Note that we go through a few hoops so that SP never points to // illegal pages at any time during the tickling process. @@ -1719,23 +1717,20 @@ void CodeGen::genLclHeap(GenTree* tree) // addi regCnt, REG_R0, 0 // // Skip: - // lui regTmp, eeGetPageSize()>>12 + // lui regPageSize, eeGetPageSize()>>12 + // addi regTmp, SP, 0 // Loop: - // lw r0, 0(SP) // tickle the page - read from the page - // sub RA, SP, regTmp // decrement SP by eeGetPageSize() - // bltu RA, regCnt, Done - // sub SP, SP,regTmp - // j Loop + // lw r0, 0(regTmp) // tickle the page - read from the page + // sub regTmp, regTmp, regPageSize + // bgeu regTmp, regCnt, Loop // // Done: - // mov SP, regCnt + // addi SP, regCnt, 0 // if (tempReg == REG_NA) tempReg = internalRegisters.Extract(tree); - regNumber rPageSize = internalRegisters.GetSingle(tree); - assert(regCnt != tempReg); emit->emitIns_R_R_R(INS_sltu, EA_PTRSIZE, tempReg, REG_SPBASE, regCnt); @@ -1746,35 +1741,24 @@ void CodeGen::genLclHeap(GenTree* tree) emit->emitIns_R_R_I(INS_beq, EA_PTRSIZE, tempReg, REG_R0, 2 << 2); emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, regCnt, REG_R0, 0); - emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12); - - // genDefineTempLabel(loop); - - // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page - emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, REG_SPBASE, 0); - - // decrement SP by eeGetPageSize() - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, tempReg, REG_SPBASE, rPageSize); + regNumber rPageSize = internalRegisters.GetSingle(tree); - assert(rPageSize != tempReg); + noway_assert(rPageSize != tempReg); - ssize_t imm = 3 << 2; // goto done. - emit->emitIns_R_R_I(INS_bltu, EA_PTRSIZE, tempReg, regCnt, imm); - - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, REG_SPBASE, REG_SPBASE, rPageSize); - - imm = -4 << 2; - // Jump to loop and tickle new stack address - emit->emitIns_I(INS_j, EA_PTRSIZE, imm); + emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12); + regSet.verifyRegUsed(rPageSize); + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, tempReg, REG_SPBASE, 0); - // Done with stack tickle loop - // genDefineTempLabel(done); + // tickle the page - this triggers a page fault when on the guard page + emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, tempReg, 0); + emit->emitIns_R_R_R(INS_sub, EA_4BYTE, tempReg, tempReg, rPageSize); - // Now just move the final value to SP - emit->emitIns_R_R_I(INS_ori, EA_PTRSIZE, REG_SPBASE, regCnt, 0); + emit->emitIns_R_R_I(INS_bgeu, EA_PTRSIZE, tempReg, regCnt, -2 << 2); // lastTouchDelta is dynamic, and can be up to a page. So if we have outgoing arg space, // we're going to assume the worst and probe. + // Move the final value to SP + emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, REG_SPBASE, regCnt, 0); } ALLOC_DONE: @@ -6701,175 +6685,6 @@ void CodeGen::genEstablishFramePointer(int delta, bool reportUnwindData) }; } -//------------------------------------------------------------------------ -// genStackProbe: Probe the stack without changing it -// -// Notes: -// This function is using loop to probe each memory page. -// -// Arguments: -// frameSize - total frame size -// rOffset - usually initial register number -// rLimit - an extra register for comparison -// rPageSize - register for storing page size -// -void CodeGen::genStackProbe(ssize_t frameSize, regNumber rOffset, regNumber rLimit, regNumber rPageSize) -{ - // make sure frameSize safely fits within 4 bytes - noway_assert((ssize_t)(int)frameSize == (ssize_t)frameSize); - - const target_size_t pageSize = compiler->eeGetPageSize(); - - // According to RISC-V Privileged ISA page size should be equal 4KiB - noway_assert(pageSize == 0x1000); - - emitter* emit = GetEmitter(); - - emit->emitLoadImmediate(EA_PTRSIZE, rLimit, -frameSize); - regSet.verifyRegUsed(rLimit); - - emit->emitIns_R_R_R(INS_add, EA_PTRSIZE, rLimit, rLimit, REG_SPBASE); - - emit->emitIns_R_I(INS_lui, EA_PTRSIZE, rPageSize, pageSize >> 12); - regSet.verifyRegUsed(rPageSize); - - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, REG_SPBASE, rPageSize); - - // Loop: - // tickle the page - Read from the updated SP - this triggers a page fault when on the guard page - emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, rOffset, 0); - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, rOffset, rOffset, rPageSize); - - // each instr is 4 bytes - // if (rOffset >= rLimit) goto Loop; - emit->emitIns_R_R_I(INS_bge, EA_PTRSIZE, rOffset, rLimit, -2 << 2); -} - -//------------------------------------------------------------------------ -// genAllocLclFrame: Probe the stack. -// -// Notes: -// This only does the probing; allocating the frame is done when callee-saved registers are saved. -// This is done before anything has been pushed. The previous frame might have a large outgoing argument -// space that has been allocated, but the lowest addresses have not been touched. Our frame setup might -// not touch up to the first 504 bytes. This means we could miss a guard page. On Windows, however, -// there are always three guard pages, so we will not miss them all. On Linux, there is only one guard -// page by default, so we need to be more careful. We do an extra probe if we might not have probed -// recently enough. That is, if a call and prolog establishment might lead to missing a page. We do this -// on Windows as well just to be consistent, even though it should not be necessary. -// -// Arguments: -// frameSize - the size of the stack frame being allocated. -// initReg - register to use as a scratch register. -// pInitRegZeroed - OUT parameter. *pInitRegZeroed is set to 'false' if and only if -// this call sets 'initReg' to a non-zero value. Otherwise, it is unchanged. -// maskArgRegsLiveIn - incoming argument registers that are currently live. -// -// Return value: -// None -// -void CodeGen::genAllocLclFrame(unsigned frameSize, regNumber initReg, bool* pInitRegZeroed, regMaskTP maskArgRegsLiveIn) -{ - assert(compiler->compGeneratingProlog); - - if (frameSize == 0) - { - return; - } - - // According to RISC-V Privileged ISA page size should be equal 4KiB - const target_size_t pageSize = compiler->eeGetPageSize(); - - assert(!compiler->info.compPublishStubParam || (REG_SECRET_STUB_PARAM != initReg)); - - target_size_t lastTouchDelta = 0; - - emitter* emit = GetEmitter(); - - // Emit the following sequence to 'tickle' the pages. - // Note it is important that stack pointer not change until this is complete since the tickles - // could cause a stack overflow, and we need to be able to crawl the stack afterward - // (which means the stack pointer needs to be known). - - if (frameSize < pageSize) - { - // no probe needed - lastTouchDelta = frameSize; - } - else if (frameSize < 3 * pageSize) - { - // between 1 and 3 pages we will probe each page without a loop, - // because it is faster that way and doesn't cost us much - lastTouchDelta = frameSize; - - for (target_size_t probeOffset = pageSize; probeOffset <= frameSize; probeOffset += pageSize) - { - emit->emitIns_R_I(INS_lui, EA_PTRSIZE, initReg, probeOffset >> 12); - regSet.verifyRegUsed(initReg); - - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg); - emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0); - - lastTouchDelta -= pageSize; - } - - assert(pInitRegZeroed != nullptr); - *pInitRegZeroed = false; // The initReg does not contain zero - - assert(lastTouchDelta == frameSize % pageSize); - compiler->unwindPadding(); - } - else - { - // probe each page, that we need to allocate large stack frame - assert(frameSize >= 3 * pageSize); - - regMaskTP availMask = RBM_ALLINT & (regSet.rsGetModifiedRegsMask() | ~RBM_INT_CALLEE_SAVED); - availMask &= ~maskArgRegsLiveIn; // Remove all of the incoming argument registers - // as they are currently live - availMask &= ~genRegMask(initReg); // Remove the pre-calculated initReg - - noway_assert(availMask != RBM_NONE); - - regMaskTP regMask = genFindLowestBit(availMask); - regNumber rLimit = genRegNumFromMask(regMask); - - availMask &= ~regMask; // Remove rLimit register - - noway_assert(availMask != RBM_NONE); - - regMask = genFindLowestBit(availMask); - regNumber rPageSize = genRegNumFromMask(regMask); - - genStackProbe((ssize_t)frameSize, initReg, rLimit, rPageSize); - - assert(pInitRegZeroed != nullptr); - *pInitRegZeroed = false; // The initReg does not contain zero - - lastTouchDelta = frameSize % pageSize; - compiler->unwindPadding(); - } - -#if STACK_PROBE_BOUNDARY_THRESHOLD_BYTES != 0 - // if the last page was too far, we will make an extra probe at the bottom - if (lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES > pageSize) - { - assert(lastTouchDelta + STACK_PROBE_BOUNDARY_THRESHOLD_BYTES < pageSize << 1); - - emit->emitIns_R_R_I(INS_addi, EA_PTRSIZE, initReg, REG_R0, frameSize); - regSet.verifyRegUsed(initReg); - - emit->emitIns_R_R_R(INS_sub, EA_PTRSIZE, initReg, REG_SPBASE, initReg); - emit->emitIns_R_R_I(INS_lw, EA_4BYTE, REG_R0, initReg, 0); - - assert(pInitRegZeroed != nullptr); - *pInitRegZeroed = false; // The initReg does not contain zero - - compiler->unwindPadding(); - } -#endif -} - void CodeGen::genJumpToThrowHlpBlk_la( SpecialCodeKind codeKind, instruction ins, regNumber reg1, BasicBlock* failBlk, regNumber reg2) { diff --git a/src/coreclr/jit/emitriscv64.cpp b/src/coreclr/jit/emitriscv64.cpp index 740e9b5fe040cc..370e5b4348deab 100644 --- a/src/coreclr/jit/emitriscv64.cpp +++ b/src/coreclr/jit/emitriscv64.cpp @@ -3351,31 +3351,6 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) if (vt == TYP_REF || vt == TYP_BYREF) emitGCvarDeadUpd(adr + ofs, dst2 DEBUG_ARG(varNum)); } - // if (emitInsWritesToLclVarStackLocPair(id)) - //{ - // unsigned ofs2 = ofs + TARGET_POINTER_SIZE; - // if (id->idGCrefReg2() != GCT_NONE) - // { - // emitGCvarLiveUpd(adr + ofs2, varNum, id->idGCrefReg2(), *dp); - // } - // else - // { - // // If the type of the local is a gc ref type, update the liveness. - // var_types vt; - // if (varNum >= 0) - // { - // // "Regular" (non-spill-temp) local. - // vt = var_types(emitComp->lvaTable[varNum].lvType); - // } - // else - // { - // TempDsc* tmpDsc = codeGen->regSet.tmpFindNum(varNum); - // vt = tmpDsc->tdTempType(); - // } - // if (vt == TYP_REF || vt == TYP_BYREF) - // emitGCvarDeadUpd(adr + ofs2, *dp); - // } - //} } #ifdef DEBUG