From f9016e3507a9669894c500c1cdd4a45aaef24354 Mon Sep 17 00:00:00 2001 From: Vladimir Sadov Date: Tue, 16 Jul 2024 20:34:43 -0700 Subject: [PATCH] [NativeAOT] When reconciling shadow stack after catch, use more precise way to figure how much to pop. (#104652) * keep SSP value in REGDISPLAY, update as we unwind. * fix Unix build * make SSP fields windows-specific * #if defined(TARGET_WINDOWS) some uses of SSP --- .../nativeaot/Runtime/PalRedhawkCommon.h | 4 ++ .../nativeaot/Runtime/StackFrameIterator.cpp | 36 ++++++++++++++ .../nativeaot/Runtime/amd64/AsmOffsetsCpu.h | 1 + .../Runtime/amd64/ExceptionHandling.asm | 48 +++++++++++-------- src/coreclr/nativeaot/Runtime/regdisplay.h | 8 +++- .../Runtime/windows/CoffNativeCodeManager.cpp | 4 ++ .../SmokeTests/UnitTests/Generics.cs | 20 ++++++++ 7 files changed, 99 insertions(+), 22 deletions(-) diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h index d72b9d9f8e3d73..b5e430779c6760 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawkCommon.h @@ -153,7 +153,11 @@ struct PAL_LIMITED_CONTEXT uintptr_t R13; uintptr_t R14; uintptr_t R15; +#if defined(TARGET_WINDOWS) + uintptr_t SSP; +#else uintptr_t __explicit_padding__; +#endif // TARGET_WINDOWS Fp128 Xmm6; Fp128 Xmm7; Fp128 Xmm8; diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index 83464274ccae9f..7ffaf3cab3afe4 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -516,6 +516,14 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO m_RegDisplay.pR13 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R13); m_RegDisplay.pR14 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R14); m_RegDisplay.pR15 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pCtx, R15); + +#if defined(TARGET_WINDOWS) + // + // SSP, we only need the value + // + m_RegDisplay.SSP = pCtx->SSP; +#endif + // // preserved xmm regs // @@ -632,6 +640,13 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC m_RegDisplay.pR14 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R14); m_RegDisplay.pR15 = (PTR_uintptr_t)PTR_TO_REG(pCtx, R15); +#if defined(TARGET_WINDOWS) + // + // SSP, not needed. Unwind from native context is never for EH. + // + m_RegDisplay.SSP = 0; +#endif + // // scratch regs // @@ -1004,6 +1019,13 @@ void StackFrameIterator::UnwindFuncletInvokeThunk() m_RegDisplay.pR14 = SP++; m_RegDisplay.pR15 = SP++; +#if defined(TARGET_WINDOWS) + if (m_RegDisplay.SSP) + { + m_RegDisplay.SSP += 8; + } +#endif + #elif defined(TARGET_X86) SP = (PTR_uintptr_t)(m_RegDisplay.SP + 0x4); // skip the saved assembly-routine-EBP @@ -1368,6 +1390,12 @@ void StackFrameIterator::UnwindUniversalTransitionThunk() m_RegDisplay.SetIP(PCODEToPINSTR(*addressOfPushedCallerIP)); m_RegDisplay.SetSP((uintptr_t)dac_cast(stackFrame->get_CallerSP())); SetControlPC(dac_cast(m_RegDisplay.GetIP())); +#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) + if (m_RegDisplay.SSP) + { + m_RegDisplay.SSP += 8; + } +#endif // All universal transition cases rely on conservative GC reporting being applied to the // full argument set that flowed into the call. Report the lower bound of this range (the @@ -1428,6 +1456,14 @@ void StackFrameIterator::UnwindThrowSiteThunk() m_RegDisplay.pR13 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R13); m_RegDisplay.pR14 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R14); m_RegDisplay.pR15 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R15); + +#if defined(TARGET_WINDOWS) + if (m_RegDisplay.SSP) + { + m_RegDisplay.SSP += 8; + } +#endif + #elif defined(TARGET_ARM) m_RegDisplay.pR4 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R4); m_RegDisplay.pR5 = (PTR_uintptr_t)PTR_TO_MEMBER_TADDR(PAL_LIMITED_CONTEXT, pContext, R5); diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h index fe98a2eafc7c39..2a2e85e5e72baa 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h @@ -59,6 +59,7 @@ PLAT_ASM_OFFSET(0f0, PAL_LIMITED_CONTEXT, Xmm15) PLAT_ASM_SIZEOF(130, REGDISPLAY) PLAT_ASM_OFFSET(78, REGDISPLAY, SP) PLAT_ASM_OFFSET(80, REGDISPLAY, IP) +PLAT_ASM_OFFSET(88, REGDISPLAY, SSP) PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx) PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp) diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm index facd8e983e6796..e6356dbe1bfcfd 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm @@ -44,7 +44,9 @@ ALTERNATE_ENTRY RhpThrowHwExGEHCONT ; this needs to be an EHCONT target since we .pushframe alloc_stack SIZEOF_XmmSaves + 8h ;; reserve stack for the xmm saves (+8h to realign stack) - push_vol_reg r8 ;; padding + rdsspq r8 ;; nop if SSP is not implemented, 0 if not enabled + push_vol_reg r8 ;; SSP + xor r8, r8 push_nonvol_reg r15 push_nonvol_reg r14 push_nonvol_reg r13 @@ -127,7 +129,9 @@ NESTED_ENTRY RhpThrowEx, _TEXT xor r8, r8 alloc_stack SIZEOF_XmmSaves + 8h ;; reserve stack for the xmm saves (+8h to realign stack) - push_vol_reg r8 ;; padding + rdsspq r8 ;; nop if SSP is not implemented, 0 if not enabled + push_vol_reg r8 ;; SSP + xor r8, r8 push_nonvol_reg r15 push_nonvol_reg r14 push_nonvol_reg r13 @@ -221,7 +225,9 @@ NESTED_ENTRY RhpRethrow, _TEXT xor r8, r8 alloc_stack SIZEOF_XmmSaves + 8h ;; reserve stack for the xmm saves (+8h to realign stack) - push_vol_reg r8 ;; padding + rdsspq r8 ;; nop if SSP is not implemented, 0 if not enabled + push_vol_reg r8 ;; SSP + xor r8, r8 push_nonvol_reg r15 push_nonvol_reg r14 push_nonvol_reg r13 @@ -490,7 +496,7 @@ endif INLINE_THREAD_UNHIJACK rdx, rcx, r9 ;; Thread in rdx, trashes rcx and r9 mov rcx, [rsp + rsp_offsetof_arguments + 18h] ;; rcx <- current ExInfo * - mov r10, [r8 + OFFSETOF__REGDISPLAY__IP] ;; r10 <- original IP value + mov r11, [r8 + OFFSETOF__REGDISPLAY__SSP] ;; r11 <- resume SSP value mov r8, [r8 + OFFSETOF__REGDISPLAY__SP] ;; r8 <- resume SP value xor r9, r9 ;; r9 <- 0 @@ -505,7 +511,7 @@ endif ;; Sanity check: if we have shadow stack, it should agree with what we have in rsp LOCAL_STACK_USE equ 118h ifdef _DEBUG - rdsspq r9 + rdsspq r9 ;; NB, r9 == 0 prior to this test r9, r9 jz @f mov r9, [r9] @@ -531,23 +537,23 @@ endif ;; reset RSP and jump to RAX @@: mov rsp, r8 ;; reset the SP to resume SP value - ;; if have shadow stack, then we need to reconcile it with the rsp change we have just made - rdsspq r9 + ;; if have shadow stack, then we need to reconcile it with the rsp change we have just made. (r11 must contain target SSP) + rdsspq r9 ;; NB, r9 == 0 prior to this test r9, r9 - jz NoSSP - - ;; Find the shadow stack pointer for the frame we are going to restore to. - ;; The SSP we search is pointing to the return address of the frame represented - ;; by the passed in context. So we search for the instruction pointer from - ;; the context and return one slot up from there. - ;; (Same logic as in GetSSPForFrameOnCurrentStack) - xor r11, r11 - @@: inc r11 - cmp [r9 + r11 * 8 - 8], r10 - jne @b - - incsspq r11 -NoSSP: jmp rax + je No_Ssp_Update + sub r11, r9 + shr r11, 3 + ;; the incsspq instruction uses only the lowest 8 bits of the argument, so we need to loop in case the increment is larger than 255 + mov r9, 255 + Update_Loop: + cmp r11, r9 + cmovb r9, r11 + incsspq r9 + sub r11, r9 + ja Update_Loop + + +No_Ssp_Update: jmp rax NESTED_END RhpCallCatchFunclet, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/regdisplay.h b/src/coreclr/nativeaot/Runtime/regdisplay.h index e95c9d5fd71d6f..cd6f13418b778c 100644 --- a/src/coreclr/nativeaot/Runtime/regdisplay.h +++ b/src/coreclr/nativeaot/Runtime/regdisplay.h @@ -30,7 +30,12 @@ struct REGDISPLAY #endif // TARGET_AMD64 uintptr_t SP; - PCODE IP; + PCODE IP; + +#if defined(TARGET_AMD64) && defined(TARGET_WINDOWS) + uintptr_t SSP; // keep track of SSP for EH unwind + // we do not adjust the original, so only need the value +#endif // TARGET_AMD64 && TARGET_WINDOWS #if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI) Fp128 Xmm[16-6]; // preserved xmm6..xmm15 regs for EH stackwalk @@ -68,6 +73,7 @@ struct REGDISPLAY inline void SetEdiLocation(unsigned long *loc) { pRdi = (PTR_uintptr_t)loc; } inline void SetEbpLocation(unsigned long *loc) { pRbp = (PTR_uintptr_t)loc; } #endif + }; #ifdef TARGET_X86 diff --git a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp index 0bd9fbb34f9a55..b8c2310d644004 100644 --- a/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/CoffNativeCodeManager.cpp @@ -790,6 +790,10 @@ bool CoffNativeCodeManager::UnwindStackFrame(MethodInfo * pMethodInfo, if (!(flags & USFF_GcUnwind)) { memcpy(pRegisterSet->Xmm, &context.Xmm6, sizeof(pRegisterSet->Xmm)); + if (pRegisterSet->SSP) + { + pRegisterSet->SSP += 8; + } } #elif defined(TARGET_ARM64) if (!(flags & USFF_GcUnwind)) diff --git a/src/tests/nativeaot/SmokeTests/UnitTests/Generics.cs b/src/tests/nativeaot/SmokeTests/UnitTests/Generics.cs index 5df11edec24563..488171e0f27ec7 100644 --- a/src/tests/nativeaot/SmokeTests/UnitTests/Generics.cs +++ b/src/tests/nativeaot/SmokeTests/UnitTests/Generics.cs @@ -2356,10 +2356,30 @@ public void Blagh() } } + [MethodImpl(MethodImplOptions.NoInlining)] + private static void DeepAV(int x) + { + if (x > 0) + DeepAV(x - 1); + + // Call an instance method on something we never allocated, but overrides a used virtual. + // This asserted the compiler when trying to build a template for Unused<__Canon>. + ((Unused)s_ref).Blagh(); + } + public static void Run() { new Used().DoStuff(); + for (int i = 0; i < 10; i++) + try + { + DeepAV(i); + } + catch (NullReferenceException) + { + } + try { // Call an instance method on something we never allocated, but overrides a used virtual.