From 8bed8e99deab68e54f63e4a56d8516c60c9e19b5 Mon Sep 17 00:00:00 2001 From: Koundinya Veluri Date: Thu, 12 Oct 2017 08:25:17 -0700 Subject: [PATCH] Clean up YieldProcessorNormalized --- src/vm/comsynchronizable.cpp | 8 ++--- src/vm/syncblk.cpp | 4 +-- src/vm/syncblk.h | 2 +- src/vm/syncblk.inl | 2 +- src/vm/synch.cpp | 2 +- src/vm/yieldprocessornormalized.cpp | 21 ++++++------- src/vm/yieldprocessornormalized.h | 49 +++++++++++++++++++---------- 7 files changed, 49 insertions(+), 39 deletions(-) diff --git a/src/vm/comsynchronizable.cpp b/src/vm/comsynchronizable.cpp index 1d7541a74a49..472ca34feb0a 100644 --- a/src/vm/comsynchronizable.cpp +++ b/src/vm/comsynchronizable.cpp @@ -1658,9 +1658,7 @@ FCIMPL1(void, ThreadNative::SpinWait, int iterations) // if (iterations <= 100000) { - YieldProcessorNormalizationInfo normalizationInfo; - for (int i = 0; i < iterations; i++) - YieldProcessorNormalized(normalizationInfo); + YieldProcessorNormalized(YieldProcessorNormalizationInfo(), iterations); return; } @@ -1670,9 +1668,7 @@ FCIMPL1(void, ThreadNative::SpinWait, int iterations) HELPER_METHOD_FRAME_BEGIN_NOPOLL(); GCX_PREEMP(); - YieldProcessorNormalizationInfo normalizationInfo; - for (int i = 0; i < iterations; i++) - YieldProcessorNormalized(normalizationInfo); + YieldProcessorNormalized(YieldProcessorNormalizationInfo(), iterations); HELPER_METHOD_FRAME_END(); } diff --git a/src/vm/syncblk.cpp b/src/vm/syncblk.cpp index be5baacda128..015f7f1cf06a 100644 --- a/src/vm/syncblk.cpp +++ b/src/vm/syncblk.cpp @@ -1917,7 +1917,7 @@ AwareLock::EnterHelperResult ObjHeader::EnterObjMonitorHelperSpin(Thread* pCurTh return AwareLock::EnterHelperResult_Contention; } - YieldProcessorWithBackOffNormalizationInfo normalizationInfo; + YieldProcessorNormalizationInfo normalizationInfo; const DWORD spinCount = g_SpinConstants.dwMonitorSpinCount; for (DWORD spinIteration = 0; spinIteration < spinCount; ++spinIteration) { @@ -3189,7 +3189,7 @@ BOOL AwareLock::EnterEpilogHelper(Thread* pCurThread, INT32 timeOut) if (g_SystemInfo.dwNumberOfProcessors > 1) { bool acquiredLock = false; - YieldProcessorWithBackOffNormalizationInfo normalizationInfo; + YieldProcessorNormalizationInfo normalizationInfo; const DWORD spinCount = g_SpinConstants.dwMonitorSpinCount; for (DWORD spinIteration = 0; spinIteration < spinCount; ++spinIteration) { diff --git a/src/vm/syncblk.h b/src/vm/syncblk.h index b90238df9349..e9c586684915 100644 --- a/src/vm/syncblk.h +++ b/src/vm/syncblk.h @@ -486,7 +486,7 @@ class AwareLock } public: - static void SpinWait(const YieldProcessorWithBackOffNormalizationInfo &normalizationInfo, DWORD spinIteration); + static void SpinWait(const YieldProcessorNormalizationInfo &normalizationInfo, DWORD spinIteration); // Helper encapsulating the fast path entering monitor. Returns what kind of result was achieved. bool TryEnterHelper(Thread* pCurThread); diff --git a/src/vm/syncblk.inl b/src/vm/syncblk.inl index a7482a3c7c60..8f4f43b50aa3 100644 --- a/src/vm/syncblk.inl +++ b/src/vm/syncblk.inl @@ -269,7 +269,7 @@ FORCEINLINE bool AwareLock::LockState::InterlockedObserveWakeSignal_Try_LockAndU return false; } -FORCEINLINE void AwareLock::SpinWait(const YieldProcessorWithBackOffNormalizationInfo &normalizationInfo, DWORD spinIteration) +FORCEINLINE void AwareLock::SpinWait(const YieldProcessorNormalizationInfo &normalizationInfo, DWORD spinIteration) { WRAPPER_NO_CONTRACT; diff --git a/src/vm/synch.cpp b/src/vm/synch.cpp index 31ed23546afe..c21e4f53a005 100644 --- a/src/vm/synch.cpp +++ b/src/vm/synch.cpp @@ -841,7 +841,7 @@ bool CLRLifoSemaphore::Wait(DWORD timeoutMs, UINT32 spinCount, UINT32 processorC } #else // !_TARGET_ARM64_ const UINT32 Sleep0Threshold = 10; - YieldProcessorWithBackOffNormalizationInfo normalizationInfo; + YieldProcessorNormalizationInfo normalizationInfo; #ifdef FEATURE_PAL // The PAL's wait subsystem is quite slow, spin more to compensate for the more expensive wait spinCount *= 2; diff --git a/src/vm/yieldprocessornormalized.cpp b/src/vm/yieldprocessornormalized.cpp index 9535fd4146ae..96d8769cbe97 100644 --- a/src/vm/yieldprocessornormalized.cpp +++ b/src/vm/yieldprocessornormalized.cpp @@ -18,7 +18,7 @@ void InitializeYieldProcessorNormalizedCrst() s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock); } -void InitializeYieldProcessorNormalized() +static void InitializeYieldProcessorNormalized() { WRAPPER_NO_CONTRACT; @@ -32,12 +32,13 @@ void InitializeYieldProcessorNormalized() // Intel pre-Skylake processor: measured typically 14-17 cycles per yield // Intel post-Skylake processor: measured typically 125-150 cycles per yield const int MeasureDurationMs = 10; - const int MaxYieldsPerNormalizedYield = 10; // measured typically 8-9 on pre-Skylake - const int MinNsPerNormalizedYield = 37; // measured typically 37-46 on post-Skylake - const int NsPerOptimialMaxSpinIterationDuration = 272; // approx. 900 cycles, measured 281 on pre-Skylake, 263 on post-Skylake - const int MaxOptimalMaxNormalizedYieldsPerSpinIteration = 10; + const int NsPerOptimalMaxSpinIterationDuration = 272; // approx. 900 cycles, measured 281 on pre-Skylake, 263 on post-Skylake const int NsPerSecond = 1000 * 1000 * 1000; + // If this constant is changed, the shift value in YieldProcessorWithBackOffNormalized() should be changed as well + const int MaxOptimalMaxNormalizedYieldsPerSpinIteration = 10; + static_assert_no_msg((1 << 4) >= MaxOptimalMaxNormalizedYieldsPerSpinIteration); + LARGE_INTEGER li; if (!QueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs) { @@ -74,22 +75,20 @@ void InitializeYieldProcessorNormalized() nsPerYield = 1; } - // Calculate the number of yields required to span the duration of a normalized yield + // Calculate the number of yields required to span the duration of a normalized yield. Since nsPerYield is at least 1, this + // value is naturally limited to MinNsPerNormalizedYield. int yieldsPerNormalizedYield = (int)(MinNsPerNormalizedYield / nsPerYield + 0.5); if (yieldsPerNormalizedYield < 1) { yieldsPerNormalizedYield = 1; } - else if (yieldsPerNormalizedYield > MaxYieldsPerNormalizedYield) - { - yieldsPerNormalizedYield = MaxYieldsPerNormalizedYield; - } + _ASSERTE(yieldsPerNormalizedYield <= MinNsPerNormalizedYield); // Calculate the maximum number of yields that would be optimal for a late spin iteration. Typically, we would not want to // spend excessive amounts of time (thousands of cycles) doing only YieldProcessor, as SwitchToThread/Sleep would do a // better job of allowing other work to run. int optimalMaxNormalizedYieldsPerSpinIteration = - (int)(NsPerOptimialMaxSpinIterationDuration / (yieldsPerNormalizedYield * nsPerYield) + 0.5); + (int)(NsPerOptimalMaxSpinIterationDuration / (yieldsPerNormalizedYield * nsPerYield) + 0.5); if (optimalMaxNormalizedYieldsPerSpinIteration < 1) { optimalMaxNormalizedYieldsPerSpinIteration = 1; diff --git a/src/vm/yieldprocessornormalized.h b/src/vm/yieldprocessornormalized.h index 26fd4492c173..58b9149fc7cf 100644 --- a/src/vm/yieldprocessornormalized.h +++ b/src/vm/yieldprocessornormalized.h @@ -4,24 +4,32 @@ #pragma once +const unsigned int MinNsPerNormalizedYield = 37; // measured typically 37-46 on post-Skylake + extern unsigned int g_yieldsPerNormalizedYield; extern unsigned int g_optimalMaxNormalizedYieldsPerSpinIteration; void InitializeYieldProcessorNormalizedCrst(); -void InitializeYieldProcessorNormalized(); void EnsureYieldProcessorNormalizedInitialized(); class YieldProcessorNormalizationInfo { private: - int yieldsPerNormalizedYield; + unsigned int yieldsPerNormalizedYield; + unsigned int optimalMaxNormalizedYieldsPerSpinIteration; + unsigned int optimalMaxYieldsPerSpinIteration; public: - YieldProcessorNormalizationInfo() : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield) + YieldProcessorNormalizationInfo() + : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield), + optimalMaxNormalizedYieldsPerSpinIteration(g_optimalMaxNormalizedYieldsPerSpinIteration), + optimalMaxYieldsPerSpinIteration(yieldsPerNormalizedYield * optimalMaxNormalizedYieldsPerSpinIteration) { } friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &); + friend void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &, unsigned int); + friend void YieldProcessorWithBackOffNormalized(const YieldProcessorNormalizationInfo &, unsigned int); }; FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo) @@ -36,31 +44,38 @@ FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo } while (--n != 0); } -class YieldProcessorWithBackOffNormalizationInfo +FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo, unsigned int count) { -private: - unsigned int yieldsPerNormalizedYield; - unsigned int optimalMaxNormalizedYieldsPerSpinIteration; - unsigned int optimalMaxYieldsPerSpinIteration; + LIMITED_METHOD_CONTRACT; + _ASSERTE(count != 0); -public: - YieldProcessorWithBackOffNormalizationInfo() - : yieldsPerNormalizedYield(g_yieldsPerNormalizedYield), - optimalMaxNormalizedYieldsPerSpinIteration(g_optimalMaxNormalizedYieldsPerSpinIteration), - optimalMaxYieldsPerSpinIteration(yieldsPerNormalizedYield * optimalMaxNormalizedYieldsPerSpinIteration) + if (sizeof(SIZE_T) <= sizeof(unsigned int)) { + // On platforms with a small SIZE_T, prevent overflow on the multiply below. normalizationInfo.yieldsPerNormalizedYield + // is limited to MinNsPerNormalizedYield by InitializeYieldProcessorNormalized(). + const unsigned int MaxCount = (unsigned int)SIZE_T_MAX / MinNsPerNormalizedYield; + if (count > MaxCount) + { + count = MaxCount; + } } - friend void YieldProcessorWithBackOffNormalized(const YieldProcessorWithBackOffNormalizationInfo &, unsigned int); -}; + SIZE_T n = (SIZE_T)count * normalizationInfo.yieldsPerNormalizedYield; + _ASSERTE(n != 0); + do + { + YieldProcessor(); + } while (--n != 0); +} FORCEINLINE void YieldProcessorWithBackOffNormalized( - const YieldProcessorWithBackOffNormalizationInfo &normalizationInfo, + const YieldProcessorNormalizationInfo &normalizationInfo, unsigned int spinIteration) { LIMITED_METHOD_CONTRACT; - _ASSERTE(((unsigned int)1 << 4) >= normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration); + // The max for the shift value is based on MaxOptimalMaxNormalizedYieldsPerSpinIteration in + // InitializeYieldProcessorNormalized() unsigned int n; if (spinIteration < 4 && ((unsigned int)1 << spinIteration) < normalizationInfo.optimalMaxNormalizedYieldsPerSpinIteration) {