From affc0fc7a79812b92445eb474c87533e32b35127 Mon Sep 17 00:00:00 2001 From: Jeremy Koritzinsky Date: Tue, 23 Aug 2022 16:46:46 -0700 Subject: [PATCH] Enable inlining P/Invokes into try blocks with no catch or filter clauses V2 (#73661) --- src/coreclr/inc/CrstTypes.def | 2 +- src/coreclr/inc/corinfo.h | 5 ++ src/coreclr/inc/crsttypes.h | 2 +- src/coreclr/jit/importer.cpp | 50 ++++++++++--------- src/coreclr/jit/lower.cpp | 21 +++++--- src/coreclr/vm/exceptionhandling.cpp | 37 ++++++++++---- src/coreclr/vm/i386/excepx86.cpp | 21 ++++++-- .../exceptioninterop/ExceptionInterop.cs | 34 +++++++++++++ 8 files changed, 125 insertions(+), 47 deletions(-) diff --git a/src/coreclr/inc/CrstTypes.def b/src/coreclr/inc/CrstTypes.def index 5ab977844a4deb..2c1beebd3ce545 100644 --- a/src/coreclr/inc/CrstTypes.def +++ b/src/coreclr/inc/CrstTypes.def @@ -202,7 +202,7 @@ Crst Exception End Crst ExecutableAllocatorLock - AcquiredAfter LoaderHeap ArgBasedStubCache UMEntryThunkFreeListLock + AcquiredAfter LoaderHeap ArgBasedStubCache UMEntryThunkFreeListLock COMCallWrapper End Crst ExecuteManRangeLock diff --git a/src/coreclr/inc/corinfo.h b/src/coreclr/inc/corinfo.h index 91ae75acc4f006..9a91fc92ae8a96 100644 --- a/src/coreclr/inc/corinfo.h +++ b/src/coreclr/inc/corinfo.h @@ -3253,4 +3253,9 @@ class ICorDynamicInfo : public ICorStaticInfo // #define IMAGE_REL_BASED_REL_THUMB_MOV32_PCREL 0x14 +/**********************************************************************************/ +#ifdef TARGET_64BIT +#define USE_PER_FRAME_PINVOKE_INIT +#endif + #endif // _COR_INFO_H_ diff --git a/src/coreclr/inc/crsttypes.h b/src/coreclr/inc/crsttypes.h index c41a84b50073f6..f96df91d88e821 100644 --- a/src/coreclr/inc/crsttypes.h +++ b/src/coreclr/inc/crsttypes.h @@ -155,7 +155,7 @@ int g_rgCrstLevelMap[] = -1, // CrstClrNotification 6, // CrstCodeFragmentHeap 9, // CrstCodeVersioning - 0, // CrstCOMCallWrapper + 3, // CrstCOMCallWrapper 5, // CrstCOMWrapperCache 3, // CrstDataTest1 0, // CrstDataTest2 diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 6fbafbcc993313..bdf66cc22ee459 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -8428,41 +8428,43 @@ bool Compiler::impCanPInvokeInlineCallSite(BasicBlock* block) return true; } -#ifdef TARGET_64BIT - // On 64-bit platforms, we disable pinvoke inlining inside of try regions. - // Note that this could be needed on other architectures too, but we - // haven't done enough investigation to know for sure at this point. - // - // Here is the comment from JIT64 explaining why: - // [VSWhidbey: 611015] - because the jitted code links in the - // Frame (instead of the stub) we rely on the Frame not being - // 'active' until inside the stub. This normally happens by the - // stub setting the return address pointer in the Frame object - // inside the stub. On a normal return, the return address - // pointer is zeroed out so the Frame can be safely re-used, but - // if an exception occurs, nobody zeros out the return address - // pointer. Thus if we re-used the Frame object, it would go - // 'active' as soon as we link it into the Frame chain. - // - // Technically we only need to disable PInvoke inlining if we're - // in a handler or if we're in a try body with a catch or - // filter/except where other non-handler code in this method - // might run and try to re-use the dirty Frame object. - // - // A desktop test case where this seems to matter is - // jit\jit64\ebvts\mcpp\sources2\ijw\__clrcall\vector_ctor_dtor.02\deldtor_clr.exe +#ifdef USE_PER_FRAME_PINVOKE_INIT + // For platforms that use per-P/Invoke InlinedCallFrame initialization, + // we can't inline P/Invokes inside of try blocks where we can resume execution in the same function. + // The runtime can correctly unwind out of an InlinedCallFrame and out of managed code. However, + // it cannot correctly unwind out of an InlinedCallFrame and stop at that frame without also unwinding + // at least one managed frame. In particular, the runtime struggles to restore non-volatile registers + // from the top-most unmanaged call before the InlinedCallFrame. As a result, the runtime does not support + // re-entering the same method frame as the InlinedCallFrame after an exception in unmanaged code. if (block->hasTryIndex()) { // This does not apply to the raw pinvoke call that is inside the pinvoke // ILStub. In this case, we have to inline the raw pinvoke call into the stub, // otherwise we would end up with a stub that recursively calls itself, and end // up with a stack overflow. + // This works correctly because the runtime never emits a catch block in a managed-to-native + // IL stub. If the runtime ever emits a catch block into a managed-to-native stub when using + // P/Invoke helpers, this condition will need to be revisited. if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB) && opts.ShouldUsePInvokeHelpers()) { return true; } - return false; + // Check if this block's try block or any containing try blocks have catch handlers. + // If any of the containing try blocks have catch handlers, + // we cannot inline a P/Invoke for reasons above. If the handler is a fault or finally handler, + // we can inline a P/Invoke into this block in the try since the code will not resume execution + // in the same method after throwing an exception if only fault or finally handlers are executed. + for (unsigned int ehIndex = block->getTryIndex(); ehIndex != EHblkDsc::NO_ENCLOSING_INDEX; + ehIndex = ehGetEnclosingTryIndex(ehIndex)) + { + if (ehGetDsc(ehIndex)->HasCatchHandler()) + { + return false; + } + } + + return true; } #endif // TARGET_64BIT diff --git a/src/coreclr/jit/lower.cpp b/src/coreclr/jit/lower.cpp index 2871a506679acd..e7feb1f819a14e 100644 --- a/src/coreclr/jit/lower.cpp +++ b/src/coreclr/jit/lower.cpp @@ -4297,6 +4297,7 @@ GenTree* Lowering::CreateFrameLinkUpdate(FrameLinkAction action) // Return Value: // none // +// See the usages for USE_PER_FRAME_PINVOKE_INIT for more information. void Lowering::InsertPInvokeMethodProlog() { noway_assert(comp->info.compUnmanagedCallCountWithGCTransition); @@ -4393,13 +4394,16 @@ void Lowering::InsertPInvokeMethodProlog() // -------------------------------------------------------- // On 32-bit targets, CORINFO_HELP_INIT_PINVOKE_FRAME initializes the PInvoke frame and then pushes it onto // the current thread's Frame stack. On 64-bit targets, it only initializes the PInvoke frame. + // As a result, don't push the frame onto the frame stack here for any 64-bit targets CLANG_FORMAT_COMMENT_ANCHOR; #ifdef TARGET_64BIT +#ifdef USE_PER_FRAME_PINVOKE_INIT + // For IL stubs, we push the frame once even when we're doing per-pinvoke init. if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) +#endif // USE_PER_FRAME_PINVOKE_INIT { - // Push a frame - if we are NOT in an IL stub, this is done right before the call - // The init routine sets InlinedCallFrame's m_pNext, so we just set the thead's top-of-stack + // Push a frame. The init routine sets InlinedCallFrame's m_pNext, so we just set the thread's top-of-stack GenTree* frameUpd = CreateFrameLinkUpdate(PushFrame); firstBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); ContainCheckStoreIndir(frameUpd->AsStoreInd()); @@ -4459,9 +4463,10 @@ void Lowering::InsertPInvokeMethodEpilog(BasicBlock* returnBB DEBUGARG(GenTree* // this in the epilog for IL stubs; for non-IL stubs the frame is popped after every PInvoke call. CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_64BIT +#ifdef USE_PER_FRAME_PINVOKE_INIT + // For IL stubs, we push the frame once even when we're doing per-pinvoke init if (comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) -#endif // TARGET_64BIT +#endif // USE_PER_FRAME_PINVOKE_INIT { GenTree* frameUpd = CreateFrameLinkUpdate(PopFrame); returnBlockRange.InsertBefore(insertionPoint, LIR::SeqTree(comp, frameUpd)); @@ -4617,7 +4622,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) // contains PInvokes; on 64-bit targets this is necessary in non-stubs. CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_64BIT +#ifdef USE_PER_FRAME_PINVOKE_INIT if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) { // Set the TCB's frame to be the one we just created. @@ -4629,7 +4634,7 @@ void Lowering::InsertPInvokeCallProlog(GenTreeCall* call) BlockRange().InsertBefore(insertBefore, LIR::SeqTree(comp, frameUpd)); ContainCheckStoreIndir(frameUpd->AsStoreInd()); } -#endif // TARGET_64BIT +#endif // USE_PER_FRAME_PINVOKE_INIT // IMPORTANT **** This instruction must be the last real instruction **** // It changes the thread's state to Preemptive mode @@ -4695,7 +4700,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) // this happens after every PInvoke call in non-stubs. 32-bit targets instead mark the frame as inactive. CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_64BIT +#ifdef USE_PER_FRAME_PINVOKE_INIT if (!comp->opts.jitFlags->IsSet(JitFlags::JIT_FLAG_IL_STUB)) { tree = CreateFrameLinkUpdate(PopFrame); @@ -4719,7 +4724,7 @@ void Lowering::InsertPInvokeCallEpilog(GenTreeCall* call) BlockRange().InsertBefore(insertionPoint, constantZero, storeCallSiteTracker); ContainCheckStoreLoc(storeCallSiteTracker); -#endif // TARGET_64BIT +#endif // USE_PER_FRAME_PINVOKE_INIT } //------------------------------------------------------------------------ diff --git a/src/coreclr/vm/exceptionhandling.cpp b/src/coreclr/vm/exceptionhandling.cpp index 9623dee9dfdcfa..07907510799d63 100644 --- a/src/coreclr/vm/exceptionhandling.cpp +++ b/src/coreclr/vm/exceptionhandling.cpp @@ -16,6 +16,7 @@ #include "virtualcallstub.h" #include "utilcode.h" #include "interoplibinterface.h" +#include "corinfo.h" #if defined(TARGET_X86) #define USE_CURRENT_CONTEXT_IN_FILTER @@ -1773,8 +1774,10 @@ CLRUnwindStatus ExceptionTracker::ProcessOSExceptionNotification( // InlinedCallFrames (ICF) are allocated, initialized and linked to the Frame chain // by the code generated by the JIT for a method containing a PInvoke. // - // JIT generates code that links in the ICF at the start of the method and unlinks it towards - // the method end. Thus, ICF is present on the Frame chain at any given point so long as the + // On platforms where USE_PER_FRAME_PINVOKE_INIT is not defined, + // the JIT generates code that links in the ICF + // at the start of the method and unlinks it towards the method end. + // Thus, ICF is present on the Frame chain at any given point so long as the // method containing the PInvoke is on the stack. // // Now, if the method containing ICF catches an exception, we will reset the Frame chain @@ -1812,13 +1815,16 @@ CLRUnwindStatus ExceptionTracker::ProcessOSExceptionNotification( // below the callerSP for which we will invoke ExceptionUnwind. // // Thus, ICF::ExceptionUnwind should not do anything significant. If any of these assumptions - // break, then the next best thing will be to make the JIT link/unlink the frame dynamically. + // break, then the next best thing will be to make the JIT link/unlink the frame dynamically // - // If the current method executing is from precompiled ReadyToRun code, then the above is no longer - // applicable because each PInvoke is wrapped by calls to the JIT_PInvokeBegin and JIT_PInvokeEnd - // helpers, which push and pop the ICF to the current thread. Unlike jitted code, the ICF is not - // linked during the method prolog, and unlinked at the epilog (it looks more like the X64 case). + // If the current method executing is from precompiled ReadyToRun code, each PInvoke is wrapped + // by calls to the JIT_PInvokeBegin and JIT_PInvokeEnd helpers, + // which push and pop the ICF to the current thread. The ICF is not + // linked during the method prolog, and unlinked at the epilog. // In that case, we need to unlink the ICF during unwinding here. + // On platforms where USE_PER_FRAME_PINVOKE_INIT is defined, the JIT generates code that links in + // the ICF immediately before and after a PInvoke in non-IL-stubs, like ReadyToRun. + // See the usages for USE_PER_FRAME_PINVOKE_INIT for more information. if (fTargetUnwind && (pFrame->GetVTablePtr() == InlinedCallFrame::GetMethodFrameVPtr())) { @@ -1837,9 +1843,20 @@ CLRUnwindStatus ExceptionTracker::ProcessOSExceptionNotification( // to the JIT_PInvokeBegin and JIT_PInvokeEnd helpers, which push and pop the ICF on the thread. The // ICF is not linked at the method prolog and unlined at the epilog when running R2R code. Since the // JIT_PInvokeEnd helper will be skipped, we need to unlink the ICF here. If the executing method - // has another pinovoke, it will re-link the ICF again when the JIT_PInvokeBegin helper is called - - if (ExecutionManager::IsReadyToRunCode(((InlinedCallFrame*)pFrame)->m_pCallerReturnAddress)) + // has another pinvoke, it will re-link the ICF again when the JIT_PInvokeBegin helper is called. + + TADDR returnAddress = ((InlinedCallFrame*)pFrame)->m_pCallerReturnAddress; +#ifdef USE_PER_FRAME_PINVOKE_INIT + // If we're setting up the frame for each P/Invoke for the given platform, + // then we do this for all P/Invokes except ones in IL stubs. + // IL stubs link the frame in for the whole stub, so if an exception is thrown during marshalling, + // the ICF will be on the frame chain and inactive. + if (returnAddress != NULL && !ExecutionManager::GetCodeMethodDesc(returnAddress)->IsILStub()) +#else + // If we aren't setting up the frame for each P/Invoke (instead setting up once per method), + // then ReadyToRun code is the only code using the per-P/Invoke logic. + if (ExecutionManager::IsReadyToRunCode(returnAddress)) +#endif { pICFForUnwindTarget = pICFForUnwindTarget->Next(); } diff --git a/src/coreclr/vm/i386/excepx86.cpp b/src/coreclr/vm/i386/excepx86.cpp index b03435b569670e..d4abd2171ee246 100644 --- a/src/coreclr/vm/i386/excepx86.cpp +++ b/src/coreclr/vm/i386/excepx86.cpp @@ -28,6 +28,7 @@ #include "eeconfig.h" #include "vars.hpp" #include "generics.h" +#include "corinfo.h" #include "asmconstants.h" #include "virtualcallstub.h" @@ -2965,6 +2966,8 @@ void ResumeAtJitEH(CrawlFrame* pCf, // InlinedCallFrame somewhere up the call chain that is not related to the current exception // handling. + // See the usages for USE_PER_FRAME_PINVOKE_INIT for more information. + #ifdef DEBUG TADDR handlerFrameSP = pCf->GetRegisterSet()->SP; #endif // DEBUG @@ -2976,10 +2979,22 @@ void ResumeAtJitEH(CrawlFrame* pCf, NULL /* StackwalkCacheUnwindInfo* */); _ASSERTE(unwindSuccess); - if (((TADDR)pThread->m_pFrame < pCf->GetRegisterSet()->SP) && ExecutionManager::IsReadyToRunCode(((InlinedCallFrame*)pThread->m_pFrame)->m_pCallerReturnAddress)) + if (((TADDR)pThread->m_pFrame < pCf->GetRegisterSet()->SP)) { - _ASSERTE((TADDR)pThread->m_pFrame >= handlerFrameSP); - pThread->m_pFrame->Pop(pThread); + TADDR returnAddress = ((InlinedCallFrame*)pThread->m_pFrame)->m_pCallerReturnAddress; +#ifdef USE_PER_FRAME_PINVOKE_INIT + // If we're setting up the frame for each P/Invoke for the given platform, + // then we do this for all P/Invokes except ones in IL stubs. + if (returnAddress != NULL && !ExecutionManager::GetCodeMethodDesc(returnAddress)->IsILStub()) +#else + // If we aren't setting up the frame for each P/Invoke (instead setting up once per method), + // then ReadyToRun code is the only code using the per-P/Invoke logic. + if (ExecutionManager::IsReadyToRunCode(returnAddress)) +#endif + { + _ASSERTE((TADDR)pThread->m_pFrame >= handlerFrameSP); + pThread->m_pFrame->Pop(pThread); + } } } diff --git a/src/tests/baseservices/exceptions/exceptioninterop/ExceptionInterop.cs b/src/tests/baseservices/exceptions/exceptioninterop/ExceptionInterop.cs index 1c6f43e12e4a45..f0f479dfc55b11 100644 --- a/src/tests/baseservices/exceptions/exceptioninterop/ExceptionInterop.cs +++ b/src/tests/baseservices/exceptions/exceptioninterop/ExceptionInterop.cs @@ -122,4 +122,38 @@ public static void ThrowNativeExceptionAndCatchInFrameWithFinally() Assert.True(caughtException); } + + [Fact] + [PlatformSpecific(TestPlatforms.Windows)] + [SkipOnMono("Exception interop not supported on Mono.")] + public static void ThrowNativeExceptionInFrameWithFinallyCatchInOuterFrame() + { + bool caughtException = false; + try + { + ThrowInFrameWithFinally(); + } + catch + { + caughtException = true; + } + + Assert.True(caughtException); + + [MethodImpl(MethodImplOptions.NoInlining)] + static void ThrowInFrameWithFinally() + { + try + { + ThrowException(); + } + finally + { + // Try calling another P/Invoke in the finally block before the catch + // to make sure we have everything set up + // to recover from the exceptional control flow. + NativeFunction(); + } + } + } }