From c8e4f2c465696477ab123a602ff12e8c0996d968 Mon Sep 17 00:00:00 2001 From: Vladimir Sadov Date: Tue, 2 Jul 2024 12:47:18 -0700 Subject: [PATCH] [NativeAOT] Adding CET support (#102680) * Add support for STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT * fix build with clang * Allow hijacked returns that land in assembly thunks * fix x86 build * fail fast if hijack is hit on an unhijacked thread. * comment * assert that OS unhijacked the thread to the same target as stashed by us. * opt into CETCOMPAT by default * unify adjustment for thunks * Use CETCompat as condition. Narrow to x64. * Enable EHCONT, if CET and CFG are enabled * tweak comments * Reconcile shadow stack with SP changes in RhpCallCatchFunclet * Use RhFailFast for failfast * drop __fastcall in HijackFunc * Apply suggestions from code review Co-authored-by: Jan Kotas * remove fastcall from RhpHijackForGcStress * Apply suggestions from code review Co-authored-by: Jan Kotas --------- Co-authored-by: Jan Kotas --- .../Microsoft.NETCore.Native.Windows.targets | 6 ++ src/coreclr/nativeaot/Runtime/EHHelpers.cpp | 55 +++++++++++ src/coreclr/nativeaot/Runtime/PalRedhawk.h | 3 + .../nativeaot/Runtime/StackFrameIterator.cpp | 66 ++++++++----- .../nativeaot/Runtime/StackFrameIterator.h | 1 + .../nativeaot/Runtime/amd64/AsmOffsetsCpu.h | 1 + .../Runtime/amd64/ExceptionHandling.asm | 45 ++++++++- .../nativeaot/Runtime/i386/GcProbe.asm | 12 +-- .../nativeaot/Runtime/inc/CommonTypes.h | 4 + src/coreclr/nativeaot/Runtime/thread.cpp | 10 +- src/coreclr/nativeaot/Runtime/thread.h | 8 +- .../nativeaot/Runtime/unix/PalRedhawkUnix.cpp | 10 ++ .../Runtime/windows/PalRedhawkMinWin.cpp | 99 ++++++++++++++----- src/coreclr/vm/excep.cpp | 5 +- 14 files changed, 254 insertions(+), 71 deletions(-) diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets index 18c3c1f017a9ff..8035484bf04a53 100644 --- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets +++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.Windows.targets @@ -99,6 +99,12 @@ The .NET Foundation licenses this file to you under the MIT license. + + + + + + diff --git a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp index 325128c4e01fcd..dc0e6fe1b75bef 100644 --- a/src/coreclr/nativeaot/Runtime/EHHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/EHHelpers.cpp @@ -536,6 +536,61 @@ int32_t __stdcall RhpVectoredExceptionHandler(PEXCEPTION_POINTERS pExPtrs) return EXCEPTION_CONTINUE_SEARCH; } + // the following would work on ARM64 as well, but there is no way to test right now. +#ifdef TARGET_AMD64 + +#ifndef STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT +#define STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT ((uintptr_t)0x80000033L) +#endif + + if (faultCode == STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT) + { + Thread * pThread = ThreadStore::GetCurrentThreadIfAvailable(); + if (pThread == NULL || !pThread->IsCurrentThreadInCooperativeMode()) + { + // if we are not in coop mode, this cannot be our hijack + // Perhaps some other runtime is responsible. + return EXCEPTION_CONTINUE_SEARCH; + } + + // Sanity check. + if (!pThread->IsHijacked()) + { + _ASSERTE(!"The thread should be hijacked by us."); + RhFailFast(); + } + + PCONTEXT interruptedContext = pExPtrs->ContextRecord; + bool areShadowStacksEnabled = PalAreShadowStacksEnabled(); + if (areShadowStacksEnabled) + { + // OS should have fixed the SP value to the same as we`ve stashed for the hijacked thread + _ASSERTE(*(size_t *)interruptedContext->GetSp() == (uintptr_t)pThread->GetHijackedReturnAddress()); + + // When the CET is enabled, the interruption happens on the ret instruction in the calee. + // We need to "pop" rsp to the caller, as if the ret has consumed it. + interruptedContext->SetSp(interruptedContext->GetSp() + 8); + } + + // Change the IP to be at the original return site, as if we have returned to the caller. + // That IP is an interruptible safe point, so we can suspend right there. + uintptr_t origIp = interruptedContext->GetIp(); + interruptedContext->SetIp((uintptr_t)pThread->GetHijackedReturnAddress()); + + pThread->InlineSuspend(interruptedContext); + + if (areShadowStacksEnabled) + { + // Undo the "pop", so that the ret could now succeed. + interruptedContext->SetSp(interruptedContext->GetSp() - 8); + interruptedContext->SetIp(origIp); + } + + ASSERT(!pThread->IsHijacked()); + return EXCEPTION_CONTINUE_EXECUTION; + } +#endif // TARGET_AMD64 (support for STATUS_RETURN_ADDRESS_HIJACK_ATTEMPT) + uintptr_t faultingIP = pExPtrs->ContextRecord->GetIp(); ICodeManager * pCodeManager = GetRuntimeInstance()->GetCodeManagerForAddress((PTR_VOID)faultingIP); diff --git a/src/coreclr/nativeaot/Runtime/PalRedhawk.h b/src/coreclr/nativeaot/Runtime/PalRedhawk.h index b047e54d6a1c65..b0a1606a115374 100644 --- a/src/coreclr/nativeaot/Runtime/PalRedhawk.h +++ b/src/coreclr/nativeaot/Runtime/PalRedhawk.h @@ -436,6 +436,7 @@ typedef struct DECLSPEC_ALIGN(16) _CONTEXT { uintptr_t GetIp() { return Pc; } uintptr_t GetLr() { return Lr; } uintptr_t GetSp() { return Sp; } + void SetSp(uintptr_t sp) { Sp = sp; } template void ForEachPossibleObjectRef(F lambda) @@ -665,6 +666,7 @@ REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalVirtualProtect(_In_ void* pAddre REDHAWK_PALIMPORT void PalFlushInstructionCache(_In_ void* pAddress, size_t size); REDHAWK_PALIMPORT void REDHAWK_PALAPI PalSleep(uint32_t milliseconds); REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalSwitchToThread(); +REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalAreShadowStacksEnabled(); REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTRIBUTES pEventAttributes, UInt32_BOOL manualReset, UInt32_BOOL initialState, _In_opt_z_ LPCWSTR pName); REDHAWK_PALIMPORT uint64_t REDHAWK_PALAPI PalGetTickCount64(); REDHAWK_PALIMPORT HANDLE REDHAWK_PALAPI PalGetModuleHandleFromPointer(_In_ void* pointer); @@ -692,6 +694,7 @@ REDHAWK_PALIMPORT bool REDHAWK_PALAPI PalStartEventPipeHelperThread(_In_ Backgro typedef void (*PalHijackCallback)(_In_ NATIVE_CONTEXT* pThreadContext, _In_opt_ void* pThreadToHijack); REDHAWK_PALIMPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* pThreadToHijack); REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalHijackCallback callback); +REDHAWK_PALIMPORT HijackFunc* REDHAWK_PALAPI PalGetHijackTarget(_In_ HijackFunc* defaultHijackTarget); #endif REDHAWK_PALIMPORT UInt32_BOOL REDHAWK_PALAPI PalAllocateThunksFromTemplate(_In_ HANDLE hTemplateModule, uint32_t templateRva, size_t templateSize, _Outptr_result_bytebuffer_(templateSize) void** newThunksOut); diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp index ae073e57c7ecd0..2574ea39a16633 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.cpp @@ -277,28 +277,8 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PInvokeTransitionF #endif // defined(USE_PORTABLE_HELPERS) - // This function guarantees that the final initialized context will refer to a managed - // frame. In the rare case where the PC does not refer to managed code (and refers to an - // assembly thunk instead), unwind through the thunk sequence to find the nearest managed - // frame. - // NOTE: When thunks are present, the thunk sequence may report a conservative GC reporting - // lower bound that must be applied when processing the managed frame. - - ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC); - - if (category == InManagedCode) - { - ASSERT(m_pInstance->IsManaged(m_ControlPC)); - } - else if (IsNonEHThunk(category)) - { - UnwindNonEHThunkSequence(); - ASSERT(m_pInstance->IsManaged(m_ControlPC)); - } - else - { - FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("PInvokeTransitionFrame PC points to an unexpected assembly thunk kind."); - } + // adjust for thunks, if needed + EnsureInitializedToManagedFrame(); STRESS_LOG1(LF_STACKWALK, LL_INFO10000, " %p\n", m_ControlPC); } @@ -484,7 +464,13 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CO } // Prepare to start a stack walk from the context listed in the supplied NATIVE_CONTEXT. -// The supplied context can describe a location in managed code. +// NOTE: When a return address hijack is executed, the PC in the NATIVE_CONTEXT +// matches the hijacked return address. This PC is not guaranteed to be in managed code +// since the hijacked return address may refer to a location where an assembly thunk called +// into managed code. +// NOTE: When the PC is in an assembly thunk, this function will unwind to the next managed +// frame and may publish a conservative stack range (if and only if any of the unwound +// thunks report a conservative range). void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pCtx, uint32_t dwFlags) { ASSERT((dwFlags & MethodStateCalculated) == 0); @@ -498,8 +484,9 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC // properly walk it in parallel. ResetNextExInfoForSP(pCtx->GetSp()); - // This codepath is used by the hijack stackwalk. The IP must be in managed code. - ASSERT(m_pInstance->IsManaged(dac_cast(pCtx->GetIp()))); + // This codepath is used by the hijack stackwalk. The IP must be in managed code + // or in a conservatively reported assembly thunk. + ASSERT(IsValidReturnAddress((void*)pCtx->GetIp())); // // control state @@ -616,6 +603,35 @@ void StackFrameIterator::InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pC #endif // TARGET_ARM #undef PTR_TO_REG + + // adjust for thunks, if needed + EnsureInitializedToManagedFrame(); +} + +void StackFrameIterator::EnsureInitializedToManagedFrame() +{ + // This function guarantees that the final initialized context will refer to a managed + // frame. In the rare case where the PC does not refer to managed code (and refers to an + // assembly thunk instead), unwind through the thunk sequence to find the nearest managed + // frame. + // NOTE: When thunks are present, the thunk sequence may report a conservative GC reporting + // lower bound that must be applied when processing the managed frame. + + ReturnAddressCategory category = CategorizeUnadjustedReturnAddress(m_ControlPC); + + if (category == InManagedCode) + { + ASSERT(m_pInstance->IsManaged(m_ControlPC)); + } + else if (IsNonEHThunk(category)) + { + UnwindNonEHThunkSequence(); + ASSERT(m_pInstance->IsManaged(m_ControlPC)); + } + else + { + FAILFAST_OR_DAC_FAIL_UNCONDITIONALLY("Unadjusted initial PC points to an unexpected assembly thunk kind."); + } } PTR_VOID StackFrameIterator::HandleExCollide(PTR_ExInfo pExInfo) diff --git a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h index cf7f524de8dbbe..77cef2133b5c16 100644 --- a/src/coreclr/nativeaot/Runtime/StackFrameIterator.h +++ b/src/coreclr/nativeaot/Runtime/StackFrameIterator.h @@ -86,6 +86,7 @@ class StackFrameIterator void InternalInit(Thread * pThreadToWalk, PTR_PInvokeTransitionFrame pFrame, uint32_t dwFlags); // GC stackwalk void InternalInit(Thread * pThreadToWalk, PTR_PAL_LIMITED_CONTEXT pCtx, uint32_t dwFlags); // EH and hijack stackwalk, and collided unwind void InternalInit(Thread * pThreadToWalk, NATIVE_CONTEXT* pCtx, uint32_t dwFlags); // GC stackwalk of redirected thread + void EnsureInitializedToManagedFrame(); void InternalInitForEH(Thread * pThreadToWalk, PAL_LIMITED_CONTEXT * pCtx, bool instructionFault); // EH stackwalk void InternalInitForStackTrace(); // Environment.StackTrace diff --git a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h index 8dd52b3acfa85f..fe98a2eafc7c39 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h +++ b/src/coreclr/nativeaot/Runtime/amd64/AsmOffsetsCpu.h @@ -58,6 +58,7 @@ PLAT_ASM_OFFSET(0f0, PAL_LIMITED_CONTEXT, Xmm15) PLAT_ASM_SIZEOF(130, REGDISPLAY) PLAT_ASM_OFFSET(78, REGDISPLAY, SP) +PLAT_ASM_OFFSET(80, REGDISPLAY, IP) PLAT_ASM_OFFSET(18, REGDISPLAY, pRbx) PLAT_ASM_OFFSET(20, REGDISPLAY, pRbp) diff --git a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm index 86c3f408665b69..facd8e983e6796 100644 --- a/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm +++ b/src/coreclr/nativeaot/Runtime/amd64/ExceptionHandling.asm @@ -15,6 +15,10 @@ include asmmacros.inc ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; NESTED_ENTRY RhpThrowHwEx, _TEXT +ALTERNATE_ENTRY RhpThrowHwExGEHCONT ; this needs to be an EHCONT target since we'll be context-jumping here. + +.GEHCONT RhpThrowHwExGEHCONT + SIZEOF_XmmSaves equ SIZEOF__PAL_LIMITED_CONTEXT - OFFSETOF__PAL_LIMITED_CONTEXT__Xmm6 STACKSIZEOF_ExInfo equ ((SIZEOF__ExInfo + 15) AND (NOT 15)) @@ -486,8 +490,9 @@ endif INLINE_THREAD_UNHIJACK rdx, rcx, r9 ;; Thread in rdx, trashes rcx and r9 mov rcx, [rsp + rsp_offsetof_arguments + 18h] ;; rcx <- current ExInfo * + mov r10, [r8 + OFFSETOF__REGDISPLAY__IP] ;; r10 <- original IP value mov r8, [r8 + OFFSETOF__REGDISPLAY__SP] ;; r8 <- resume SP value - xor r9d, r9d ;; r9 <- 0 + xor r9, r9 ;; r9 <- 0 @@: mov rcx, [rcx + OFFSETOF__ExInfo__m_pPrevExInfo] ;; rcx <- next ExInfo cmp rcx, r9 @@ -497,6 +502,20 @@ endif @@: mov [rdx + OFFSETOF__Thread__m_pExInfoStackHead], rcx ;; store the new head on the Thread + ;; Sanity check: if we have shadow stack, it should agree with what we have in rsp + LOCAL_STACK_USE equ 118h + ifdef _DEBUG + rdsspq r9 + test r9, r9 + jz @f + mov r9, [r9] + cmp [rsp + LOCAL_STACK_USE], r9 + je @f + int 3 + @@: + xor r9, r9 ;; r9 <- 0 + endif + test [RhpTrapThreads], TrapThreadsFlags_AbortInProgress jz @f @@ -507,12 +526,28 @@ endif ;; It was the ThreadAbortException, so rethrow it mov rcx, STATUS_REDHAWK_THREAD_ABORT mov rdx, rax ;; rdx <- continuation address as exception RIP - mov rsp, r8 ;; reset the SP to resume SP value - jmp RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception + mov rax, RhpThrowHwEx ;; Throw the ThreadAbortException as a special kind of hardware exception - ;; reset RSP and jump to the continuation address + ;; reset RSP and jump to RAX @@: mov rsp, r8 ;; reset the SP to resume SP value - jmp rax + + ;; if have shadow stack, then we need to reconcile it with the rsp change we have just made + rdsspq r9 + test r9, r9 + jz NoSSP + + ;; Find the shadow stack pointer for the frame we are going to restore to. + ;; The SSP we search is pointing to the return address of the frame represented + ;; by the passed in context. So we search for the instruction pointer from + ;; the context and return one slot up from there. + ;; (Same logic as in GetSSPForFrameOnCurrentStack) + xor r11, r11 + @@: inc r11 + cmp [r9 + r11 * 8 - 8], r10 + jne @b + + incsspq r11 +NoSSP: jmp rax NESTED_END RhpCallCatchFunclet, _TEXT diff --git a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm index 7e2715d3dd7685..fe09d2a73022a7 100644 --- a/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm +++ b/src/coreclr/nativeaot/Runtime/i386/GcProbe.asm @@ -251,7 +251,7 @@ RhpGcStressProbe endp endif ;; FEATURE_GC_STRESS -FASTCALL_FUNC RhpGcProbeHijack, 0 +_RhpGcProbeHijack@0 proc public HijackFixupProlog test [RhpTrapThreads], TrapThreadsFlags_TrapThreads jnz WaitForGC @@ -261,18 +261,18 @@ WaitForGC: or ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX jmp RhpWaitForGC -FASTCALL_ENDFUNC +_RhpGcProbeHijack@0 endp ifdef FEATURE_GC_STRESS -FASTCALL_FUNC RhpGcStressHijack, 0 +_RhpGcStressHijack@0 proc public HijackFixupProlog or ecx, DEFAULT_PROBE_SAVE_FLAGS + PTFF_SAVE_RAX jmp RhpGcStressProbe -FASTCALL_ENDFUNC +_RhpGcStressHijack@0 endp -FASTCALL_FUNC RhpHijackForGcStress, 0 +_RhpHijackForGcStress@0 proc public push ebp mov ebp, esp @@ -307,7 +307,7 @@ FASTCALL_FUNC RhpHijackForGcStress, 0 pop edx pop ebp ret -FASTCALL_ENDFUNC +_RhpHijackForGcStress@0 endp endif ;; FEATURE_GC_STRESS end diff --git a/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h b/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h index 71cd8fcd0bf2b9..1b6e92b1788f27 100644 --- a/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h +++ b/src/coreclr/nativeaot/Runtime/inc/CommonTypes.h @@ -61,4 +61,8 @@ typedef struct _GUID { } GUID; #endif // FEATURE_EVENT_TRACE && !_INC_WINDOWS +// Hijack funcs are not called, they are "returned to". And when done, they return to the actual caller. +// Thus they cannot have any parameters or return anything. +typedef void HijackFunc(); + #endif // __COMMON_TYPES_H__ diff --git a/src/coreclr/nativeaot/Runtime/thread.cpp b/src/coreclr/nativeaot/Runtime/thread.cpp index c3a72d1846d959..b796b052182260 100644 --- a/src/coreclr/nativeaot/Runtime/thread.cpp +++ b/src/coreclr/nativeaot/Runtime/thread.cpp @@ -572,13 +572,13 @@ void Thread::GcScanRootsWorker(ScanFunc * pfnEnumCallback, ScanContext * pvCallb #ifdef FEATURE_HIJACK -EXTERN_C void FASTCALL RhpGcProbeHijack(); -EXTERN_C void FASTCALL RhpGcStressHijack(); +EXTERN_C void RhpGcProbeHijack(); +EXTERN_C void RhpGcStressHijack(); // static bool Thread::IsHijackTarget(void* address) { - if (&RhpGcProbeHijack == address) + if (PalGetHijackTarget(/*defaultHijackTarget*/&RhpGcProbeHijack) == address) return true; #ifdef FEATURE_GC_STRESS if (&RhpGcStressHijack == address) @@ -697,7 +697,9 @@ void Thread::HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijac #endif //FEATURE_SUSPEND_REDIRECTION } - pThread->HijackReturnAddress(pThreadContext, &RhpGcProbeHijack); + pThread->HijackReturnAddress( + pThreadContext, + PalGetHijackTarget(/*defaultHijackTarget*/&RhpGcProbeHijack)); } #ifdef FEATURE_GC_STRESS diff --git a/src/coreclr/nativeaot/Runtime/thread.h b/src/coreclr/nativeaot/Runtime/thread.h index f5a1c82e59697d..4c0a21e9f9ab7f 100644 --- a/src/coreclr/nativeaot/Runtime/thread.h +++ b/src/coreclr/nativeaot/Runtime/thread.h @@ -173,16 +173,9 @@ class Thread : private RuntimeThreadLocals #ifdef FEATURE_HIJACK static void HijackCallback(NATIVE_CONTEXT* pThreadContext, void* pThreadToHijack); - // - // Hijack funcs are not called, they are "returned to". And when done, they return to the actual caller. - // Thus they cannot have any parameters or return anything. - // - typedef void FASTCALL HijackFunc(); - void HijackReturnAddress(PAL_LIMITED_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction); void HijackReturnAddress(NATIVE_CONTEXT* pSuspendCtx, HijackFunc* pfnHijackFunction); void HijackReturnAddressWorker(StackFrameIterator* frameIterator, HijackFunc* pfnHijackFunction); - bool InlineSuspend(NATIVE_CONTEXT* interruptedContext); void CrossThreadUnhijack(); void UnhijackWorker(); #else // FEATURE_HIJACK @@ -209,6 +202,7 @@ class Thread : private RuntimeThreadLocals static uint64_t s_DeadThreadsNonAllocBytes; public: + bool InlineSuspend(NATIVE_CONTEXT* interruptedContext); static uint64_t GetDeadThreadsNonAllocBytes(); diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp index d9ec970a59649c..d30c3cb3aa6af3 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp @@ -636,6 +636,11 @@ REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI __stdcall PalSwitchToThread() return false; } +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAreShadowStacksEnabled() +{ + return false; +} + extern "C" UInt32_BOOL CloseHandle(HANDLE handle) { if ((handle == NULL) || (handle == INVALID_HANDLE_VALUE)) @@ -1070,6 +1075,11 @@ REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalH return AddSignalHandler(INJECT_ACTIVATION_SIGNAL, ActivationHandler, &g_previousActivationHandler); } +REDHAWK_PALIMPORT HijackFunc* REDHAWK_PALAPI PalGetHijackTarget(HijackFunc* defaultHijackTarget) +{ + return defaultHijackTarget; +} + REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* pThreadToHijack) { ThreadUnixHandle* threadHandle = (ThreadUnixHandle*)hThread; diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index 8baee9a84be06d..2b52cfd78032f5 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -61,6 +61,11 @@ static HMODULE LoadKernel32dll() return LoadLibraryExW(L"kernel32", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); } +static HMODULE LoadNtdlldll() +{ + return LoadLibraryExW(L"ntdll.dll", NULL, LOAD_LIBRARY_SEARCH_SYSTEM32); +} + void InitializeCurrentProcessCpuCount() { DWORD count; @@ -332,6 +337,20 @@ REDHAWK_PALEXPORT HANDLE REDHAWK_PALAPI PalCreateEventW(_In_opt_ LPSECURITY_ATTR return CreateEventW(pEventAttributes, manualReset, initialState, pName); } +REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalAreShadowStacksEnabled() +{ +#if defined(TARGET_AMD64) + // The SSP is null when CET shadow stacks are not enabled. On processors that don't support shadow stacks, this is a + // no-op and the intrinsic returns 0. CET shadow stacks are enabled or disabled for all threads, so the result is the + // same from any thread. + return _rdsspq() != 0; +#else + // When implementing AreShadowStacksEnabled() on other architectures, review all the places where this is used. + return false; +#endif +} + + #ifdef TARGET_X86 #define EXCEPTION_HIJACK 0xe0434f4e // 0xe0000000 | 'COM'+1 @@ -492,6 +511,10 @@ REDHAWK_PALEXPORT CONTEXT* PalAllocateCompleteOSContext(_Out_ uint8_t** contextB context = context | CONTEXT_XSTATE; } + // the context does not need XSTATE_MASK_CET_U because we should not be using + // redirection when CET is enabled and should not be here. + _ASSERTE(!PalAreShadowStacksEnabled()); + // Retrieve contextSize by passing NULL for Buffer DWORD contextSize = 0; ULONG64 xStateCompactionMask = XSTATE_MASK_LEGACY | XSTATE_MASK_AVX | XSTATE_MASK_MPX | XSTATE_MASK_AVX512; @@ -585,8 +608,6 @@ REDHAWK_PALIMPORT void REDHAWK_PALAPI PopulateControlSegmentRegisters(CONTEXT* p static PalHijackCallback g_pHijackCallback; -#ifdef FEATURE_SPECIAL_USER_MODE_APC - // These declarations are for a new special user-mode APC feature introduced in Windows. These are not yet available in Windows // SDK headers, so some names below are prefixed with "CLONE_" to avoid conflicts in the future. Once the prefixed declarations // become available in the Windows SDK headers, the prefixed declarations below can be removed in favor of the SDK ones. @@ -616,6 +637,8 @@ static const CLONE_QUEUE_USER_APC_FLAGS SpecialUserModeApcWithContextFlags = (CL (CLONE_QUEUE_USER_APC_FLAGS_SPECIAL_USER_APC | CLONE_QUEUE_USER_APC_CALLBACK_DATA_CONTEXT); +static void* g_returnAddressHijackTarget = NULL; + static void NTAPI ActivationHandler(ULONG_PTR parameter) { CLONE_APC_CALLBACK_DATA* data = (CLONE_APC_CALLBACK_DATA*)parameter; @@ -624,7 +647,6 @@ static void NTAPI ActivationHandler(ULONG_PTR parameter) Thread* pThread = (Thread*)data->Parameter; pThread->SetActivationPending(false); } -#endif REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalHijackCallback callback) { @@ -634,6 +656,55 @@ REDHAWK_PALEXPORT UInt32_BOOL REDHAWK_PALAPI PalRegisterHijackCallback(_In_ PalH return true; } +void InitHijackingAPIs() +{ + HMODULE hKernel32 = LoadKernel32dll(); + +#ifdef HOST_AMD64 + typedef BOOL (WINAPI *IsWow64Process2Proc)(HANDLE hProcess, USHORT *pProcessMachine, USHORT *pNativeMachine); + + IsWow64Process2Proc pfnIsWow64Process2Proc = (IsWow64Process2Proc)GetProcAddress(hKernel32, "IsWow64Process2"); + USHORT processMachine, hostMachine; + if (pfnIsWow64Process2Proc != nullptr && + (*pfnIsWow64Process2Proc)(GetCurrentProcess(), &processMachine, &hostMachine) && + (hostMachine == IMAGE_FILE_MACHINE_ARM64) && + !IsWindowsVersionOrGreater(10, 0, 26100)) + { + // Special user-mode APCs are broken on WOW64 processes (x64 running on Arm64 machine) with Windows older than 11.0.26100 (24H2) + g_pfnQueueUserAPC2Proc = NULL; + } + else +#endif // HOST_AMD64 + { + g_pfnQueueUserAPC2Proc = (QueueUserAPC2Proc)GetProcAddress(hKernel32, "QueueUserAPC2"); + } + + if (PalAreShadowStacksEnabled()) + { + // When shadow stacks are enabled, support for special user-mode APCs is required + _ASSERTE(g_pfnQueueUserAPC2Proc != NULL); + + HMODULE hModNtdll = LoadNtdlldll(); + typedef void* (*PFN_RtlGetReturnAddressHijackTarget)(void); + + void* rtlGetReturnAddressHijackTarget = GetProcAddress(hModNtdll, "RtlGetReturnAddressHijackTarget"); + if (rtlGetReturnAddressHijackTarget != NULL) + { + g_returnAddressHijackTarget = ((PFN_RtlGetReturnAddressHijackTarget)rtlGetReturnAddressHijackTarget)(); + } + + if (g_returnAddressHijackTarget == NULL) + { + _ASSERTE(!"RtlGetReturnAddressHijackTarget must provide a target when shadow stacks are enabled"); + } + } +} + +REDHAWK_PALIMPORT HijackFunc* REDHAWK_PALAPI PalGetHijackTarget(HijackFunc* defaultHijackTarget) +{ + return g_returnAddressHijackTarget ? (HijackFunc*)g_returnAddressHijackTarget : defaultHijackTarget; +} + REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* pThreadToHijack) { _ASSERTE(hThread != INVALID_HANDLE_VALUE); @@ -642,28 +713,10 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PalHijack(HANDLE hThread, _In_opt_ void* p // initialize g_pfnQueueUserAPC2Proc on demand. // Note that only one thread at a time may perform suspension (guaranteed by the thread store lock) - // so simple conditional assignment is ok. + // so simple condition check is ok. if (g_pfnQueueUserAPC2Proc == QUEUE_USER_APC2_UNINITIALIZED) { - HMODULE hKernel32 = LoadKernel32dll(); -#ifdef HOST_AMD64 - typedef BOOL (WINAPI *IsWow64Process2Proc)(HANDLE hProcess, USHORT *pProcessMachine, USHORT *pNativeMachine); - - IsWow64Process2Proc pfnIsWow64Process2Proc = (IsWow64Process2Proc)GetProcAddress(hKernel32, "IsWow64Process2"); - USHORT processMachine, hostMachine; - if (pfnIsWow64Process2Proc != nullptr && - (*pfnIsWow64Process2Proc)(GetCurrentProcess(), &processMachine, &hostMachine) && - (hostMachine == IMAGE_FILE_MACHINE_ARM64) && - !IsWindowsVersionOrGreater(10, 0, 26100)) - { - // Special user-mode APCs are broken on WOW64 processes (x64 running on Arm64 machine) with Windows older than 11.0.26100 (24H2) - g_pfnQueueUserAPC2Proc = NULL; - } - else -#endif // HOST_AMD64 - { - g_pfnQueueUserAPC2Proc = (QueueUserAPC2Proc)GetProcAddress(hKernel32, "QueueUserAPC2"); - } + InitHijackingAPIs(); } if (g_pfnQueueUserAPC2Proc) diff --git a/src/coreclr/vm/excep.cpp b/src/coreclr/vm/excep.cpp index 1183d1e825b581..942a8df13cdd67 100644 --- a/src/coreclr/vm/excep.cpp +++ b/src/coreclr/vm/excep.cpp @@ -6765,11 +6765,14 @@ VEH_ACTION WINAPI CLRVectoredExceptionHandler(PEXCEPTION_POINTERS pExceptionInfo { if (pThread == NULL || !pThread->PreemptiveGCDisabled()) { - // We are not running managed code, so this cannot be our hijack + // if we are not in coop mode, this cannot be our hijack // Perhaps some other runtime is responsible. return VEH_CONTINUE_SEARCH; } + // Sanity check. The thread should be hijacked by us. + _ASSERTE_ALL_BUILDS(pThread->HasThreadState(Thread::TS_Hijacked)); + PCONTEXT interruptedContext = pExceptionInfo->ContextRecord; bool areShadowStacksEnabled = Thread::AreShadowStacksEnabled(); if (areShadowStacksEnabled)