diff --git a/src/mscorlib/src/Internal/Runtime/Augments/RuntimeThread.cs b/src/mscorlib/src/Internal/Runtime/Augments/RuntimeThread.cs index 4c67ea3fd625..6365d0f7fd84 100644 --- a/src/mscorlib/src/Internal/Runtime/Augments/RuntimeThread.cs +++ b/src/mscorlib/src/Internal/Runtime/Augments/RuntimeThread.cs @@ -207,8 +207,8 @@ internal static int OptimalMaxSpinWaitsPerSpinIteration } // This is done lazily because the first call to the function below in the process triggers a measurement that - // takes a nontrivial amount of time. See Thread::InitializeYieldProcessorNormalized(), which describes and - // calculates this value. + // takes a nontrivial amount of time if the measurement has not already been done in the backgorund. + // See Thread::InitializeYieldProcessorNormalized(), which describes and calculates this value. s_optimalMaxSpinWaitsPerSpinIteration = GetOptimalMaxSpinWaitsPerSpinIterationInternal(); Debug.Assert(s_optimalMaxSpinWaitsPerSpinIteration > 0); return s_optimalMaxSpinWaitsPerSpinIteration; diff --git a/src/vm/comsynchronizable.cpp b/src/vm/comsynchronizable.cpp index 8fce346142c4..d80ea3c0995b 100644 --- a/src/vm/comsynchronizable.cpp +++ b/src/vm/comsynchronizable.cpp @@ -1632,8 +1632,9 @@ INT32 QCALLTYPE ThreadNative::GetOptimalMaxSpinWaitsPerSpinIteration() BEGIN_QCALL; - Thread::EnsureYieldProcessorNormalizedInitialized(); - optimalMaxNormalizedYieldsPerSpinIteration = Thread::GetOptimalMaxNormalizedYieldsPerSpinIteration(); + // RuntimeThread calls this function only once lazily and caches the result, so ensure initialization + EnsureYieldProcessorNormalizedInitialized(); + optimalMaxNormalizedYieldsPerSpinIteration = g_optimalMaxNormalizedYieldsPerSpinIteration; END_QCALL; @@ -1655,10 +1656,11 @@ FCIMPL1(void, ThreadNative::SpinWait, int iterations) // spinning for less than that number of cycles, then switching to preemptive // mode won't help a GC start any faster. // - if (iterations <= 100000 && Thread::IsYieldProcessorNormalizedInitialized()) + if (iterations <= 100000) { + YieldProcessorNormalizationInfo normalizationInfo = YieldProcessorNormalizationInfo::GetNormalizationInfo(); for (int i = 0; i < iterations; i++) - Thread::YieldProcessorNormalized(); + YieldProcessorNormalized(normalizationInfo); return; } @@ -1668,9 +1670,9 @@ FCIMPL1(void, ThreadNative::SpinWait, int iterations) HELPER_METHOD_FRAME_BEGIN_NOPOLL(); GCX_PREEMP(); - Thread::EnsureYieldProcessorNormalizedInitialized(); + YieldProcessorNormalizationInfo normalizationInfo = YieldProcessorNormalizationInfo::GetNormalizationInfo(); for (int i = 0; i < iterations; i++) - Thread::YieldProcessorNormalized(); + YieldProcessorNormalized(normalizationInfo); HELPER_METHOD_FRAME_END(); } diff --git a/src/vm/finalizerthread.cpp b/src/vm/finalizerthread.cpp index 3ba346840716..2955decbd747 100644 --- a/src/vm/finalizerthread.cpp +++ b/src/vm/finalizerthread.cpp @@ -744,6 +744,8 @@ DWORD WINAPI FinalizerThread::FinalizerThreadStart(void *args) #endif GetFinalizerThread()->SetBackground(TRUE); + EnsureYieldProcessorNormalizedInitialized(); + #ifdef FEATURE_PROFAPI_ATTACH_DETACH // Add the Profiler Attach Event to the array of event handles that the // finalizer thread waits on. If the process is not enabled for profiler diff --git a/src/vm/threads.cpp b/src/vm/threads.cpp index 91373930e9c7..f7daf69f6ca9 100644 --- a/src/vm/threads.cpp +++ b/src/vm/threads.cpp @@ -63,6 +63,8 @@ SPTR_IMPL(ThreadStore, ThreadStore, s_pThreadStore); CONTEXT *ThreadStore::s_pOSContext = NULL; CLREvent *ThreadStore::s_pWaitForStackCrawlEvent; +static CrstStatic s_initializeYieldProcessorNormalizedCrst; + #ifndef DACCESS_COMPILE @@ -1363,7 +1365,7 @@ void InitThreadManager() } CONTRACTL_END; - Thread::s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock); + s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock); // All patched helpers should fit into one page. // If you hit this assert on retail build, there is most likely problem with BBT script. @@ -11747,25 +11749,29 @@ ULONGLONG Thread::QueryThreadProcessorUsage() } #endif // FEATURE_APPDOMAIN_RESOURCE_MONITORING -CrstStatic Thread::s_initializeYieldProcessorNormalizedCrst; -int Thread::s_yieldsPerNormalizedYield = 0; -int Thread::s_optimalMaxNormalizedYieldsPerSpinIteration = 0; +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// YieldProcessorNormalized + +// Defaults are for when InitializeYieldProcessorNormalized has not yet been called or when no measurement is done, and are +// tuned for Skylake processors +int g_yieldsPerNormalizedYield = 1; // 9 for pre-Skylake +int g_optimalMaxNormalizedYieldsPerSpinIteration = 7; + +static Volatile s_isYieldProcessorNormalizedInitialized = false; -void Thread::InitializeYieldProcessorNormalized() +void InitializeYieldProcessorNormalized() { LIMITED_METHOD_CONTRACT; CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst); - if (IsYieldProcessorNormalizedInitialized()) + if (s_isYieldProcessorNormalizedInitialized) { return; } // Intel pre-Skylake processor: measured typically 14-17 cycles per yield // Intel post-Skylake processor: measured typically 125-150 cycles per yield - const int DefaultYieldsPerNormalizedYield = 1; // defaults are for when no measurement is done - const int DefaultOptimalMaxNormalizedYieldsPerSpinIteration = 64; // tuned for pre-Skylake processors, for post-Skylake it should be 7 const int MeasureDurationMs = 10; const int MaxYieldsPerNormalizedYield = 10; // measured typically 8-9 on pre-Skylake const int MinNsPerNormalizedYield = 37; // measured typically 37-46 on post-Skylake @@ -11776,8 +11782,7 @@ void Thread::InitializeYieldProcessorNormalized() if (!QueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs) { // High precision clock not available or clock resolution is too low, resort to defaults - s_yieldsPerNormalizedYield = DefaultYieldsPerNormalizedYield; - s_optimalMaxNormalizedYieldsPerSpinIteration = DefaultOptimalMaxNormalizedYieldsPerSpinIteration; + s_isYieldProcessorNormalizedInitialized = true; return; } ULONGLONG ticksPerSecond = li.QuadPart; @@ -11827,6 +11832,17 @@ void Thread::InitializeYieldProcessorNormalized() optimalMaxNormalizedYieldsPerSpinIteration = 1; } - s_yieldsPerNormalizedYield = yieldsPerNormalizedYield; - s_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration; + g_yieldsPerNormalizedYield = yieldsPerNormalizedYield; + g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration; + s_isYieldProcessorNormalizedInitialized = true; +} + +void EnsureYieldProcessorNormalizedInitialized() +{ + WRAPPER_NO_CONTRACT; + + if (!s_isYieldProcessorNormalizedInitialized) + { + InitializeYieldProcessorNormalized(); + } } diff --git a/src/vm/threads.h b/src/vm/threads.h index 4000f216f442..c8fe9cfb5331 100644 --- a/src/vm/threads.h +++ b/src/vm/threads.h @@ -5362,71 +5362,6 @@ class Thread: public IUnknown m_HijackReturnKind = returnKind; } #endif // FEATURE_HIJACK - -private: - static CrstStatic s_initializeYieldProcessorNormalizedCrst; - static int s_yieldsPerNormalizedYield; - static int s_optimalMaxNormalizedYieldsPerSpinIteration; - -private: - static void InitializeYieldProcessorNormalized(); - -public: - static bool IsYieldProcessorNormalizedInitialized() - { - LIMITED_METHOD_CONTRACT; - return s_yieldsPerNormalizedYield != 0 && s_optimalMaxNormalizedYieldsPerSpinIteration != 0; - } - -public: - static void EnsureYieldProcessorNormalizedInitialized() - { - LIMITED_METHOD_CONTRACT; - - if (!IsYieldProcessorNormalizedInitialized()) - { - InitializeYieldProcessorNormalized(); - } - } - -public: - static int GetOptimalMaxNormalizedYieldsPerSpinIteration() - { - WRAPPER_NO_CONTRACT; - _ASSERTE(IsYieldProcessorNormalizedInitialized()); - - return s_optimalMaxNormalizedYieldsPerSpinIteration; - } - -public: - static void YieldProcessorNormalized() - { - WRAPPER_NO_CONTRACT; - _ASSERTE(IsYieldProcessorNormalizedInitialized()); - - int n = s_yieldsPerNormalizedYield; - while (--n >= 0) - { - YieldProcessor(); - } - } - - static void YieldProcessorNormalizedWithBackOff(unsigned int spinIteration) - { - WRAPPER_NO_CONTRACT; - _ASSERTE(IsYieldProcessorNormalizedInitialized()); - - int n = s_optimalMaxNormalizedYieldsPerSpinIteration; - if (spinIteration <= 30 && (1 << spinIteration) < n) - { - n = 1 << spinIteration; - } - n *= s_yieldsPerNormalizedYield; - while (--n >= 0) - { - YieldProcessor(); - } - } }; // End of class Thread @@ -7573,4 +7508,71 @@ class ThreadStateNCStackHolder BOOL Debug_IsLockedViaThreadSuspension(); +//////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////// +// YieldProcessorNormalized + +extern int g_yieldsPerNormalizedYield; +extern int g_optimalMaxNormalizedYieldsPerSpinIteration; + +void InitializeYieldProcessorNormalized(); +void EnsureYieldProcessorNormalizedInitialized(); + +class YieldProcessorNormalizationInfo +{ +private: + int m_yieldProcessorIterations; + + YieldProcessorNormalizationInfo(int yieldsPerNormalizedYield) : m_yieldProcessorIterations(yieldsPerNormalizedYield) + { + } + + YieldProcessorNormalizationInfo( + int yieldsPerNormalizedYield, + int optimalMaxNormalizedYieldsPerSpinIteration, + unsigned int spinIteration) + : + m_yieldProcessorIterations( + ( + spinIteration <= 30 && (1 << spinIteration) < optimalMaxNormalizedYieldsPerSpinIteration + ? 1 << spinIteration + : optimalMaxNormalizedYieldsPerSpinIteration + ) * yieldsPerNormalizedYield) + { + } + +public: + int GetYieldProcessorIterations() const + { + return m_yieldProcessorIterations; + } + + static YieldProcessorNormalizationInfo GetNormalizationInfo() + { + LIMITED_METHOD_CONTRACT; + return YieldProcessorNormalizationInfo(g_yieldsPerNormalizedYield); + } + + static YieldProcessorNormalizationInfo GetNormalizationInfoForBackOff(int spinIteration) + { + LIMITED_METHOD_CONTRACT; + + return + YieldProcessorNormalizationInfo( + g_yieldsPerNormalizedYield, + g_optimalMaxNormalizedYieldsPerSpinIteration, + spinIteration); + } +}; + +inline void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo) +{ + LIMITED_METHOD_CONTRACT; + + int n = normalizationInfo.GetYieldProcessorIterations(); + while (--n >= 0) + { + YieldProcessor(); + } +} + #endif //__threads_h__