Skip to content

Commit

Permalink
Move initialization of YieldProcessorNormalized to the finalizer thread
Browse files Browse the repository at this point in the history
Fixes https://github.com/dotnet/coreclr/issues/13984
- Also moved relevant functions out of the Thread class as requested in the issue
- For some reason, after moving the functions out of the Thread class, YieldProcessorNormalized was not getting inlined anymore. It seems to be important to have it be inlined such that the memory loads are hoisted out of outer loops. To remove the dependency on the compiler to do it (even with forceinline it's not possible to hoist sometimes, for instance InterlockedCompareExchnage loops), changed the signatures to do what is intended.
  • Loading branch information
kouvel committed Sep 19, 2017
1 parent 7f2b64a commit 5cb8253
Show file tree
Hide file tree
Showing 5 changed files with 107 additions and 85 deletions.
4 changes: 2 additions & 2 deletions src/mscorlib/src/Internal/Runtime/Augments/RuntimeThread.cs
Original file line number Diff line number Diff line change
Expand Up @@ -207,8 +207,8 @@ internal static int OptimalMaxSpinWaitsPerSpinIteration
}

// This is done lazily because the first call to the function below in the process triggers a measurement that
// takes a nontrivial amount of time. See Thread::InitializeYieldProcessorNormalized(), which describes and
// calculates this value.
// takes a nontrivial amount of time if the measurement has not already been done in the backgorund.
// See Thread::InitializeYieldProcessorNormalized(), which describes and calculates this value.
s_optimalMaxSpinWaitsPerSpinIteration = GetOptimalMaxSpinWaitsPerSpinIterationInternal();
Debug.Assert(s_optimalMaxSpinWaitsPerSpinIteration > 0);
return s_optimalMaxSpinWaitsPerSpinIteration;
Expand Down
14 changes: 8 additions & 6 deletions src/vm/comsynchronizable.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1632,8 +1632,9 @@ INT32 QCALLTYPE ThreadNative::GetOptimalMaxSpinWaitsPerSpinIteration()

BEGIN_QCALL;

Thread::EnsureYieldProcessorNormalizedInitialized();
optimalMaxNormalizedYieldsPerSpinIteration = Thread::GetOptimalMaxNormalizedYieldsPerSpinIteration();
// RuntimeThread calls this function only once lazily and caches the result, so ensure initialization
EnsureYieldProcessorNormalizedInitialized();
optimalMaxNormalizedYieldsPerSpinIteration = g_optimalMaxNormalizedYieldsPerSpinIteration;

END_QCALL;

Expand All @@ -1655,10 +1656,11 @@ FCIMPL1(void, ThreadNative::SpinWait, int iterations)
// spinning for less than that number of cycles, then switching to preemptive
// mode won't help a GC start any faster.
//
if (iterations <= 100000 && Thread::IsYieldProcessorNormalizedInitialized())
if (iterations <= 100000)
{
YieldProcessorNormalizationInfo normalizationInfo = YieldProcessorNormalizationInfo::GetNormalizationInfo();
for (int i = 0; i < iterations; i++)
Thread::YieldProcessorNormalized();
YieldProcessorNormalized(normalizationInfo);
return;
}

Expand All @@ -1668,9 +1670,9 @@ FCIMPL1(void, ThreadNative::SpinWait, int iterations)
HELPER_METHOD_FRAME_BEGIN_NOPOLL();
GCX_PREEMP();

Thread::EnsureYieldProcessorNormalizedInitialized();
YieldProcessorNormalizationInfo normalizationInfo = YieldProcessorNormalizationInfo::GetNormalizationInfo();
for (int i = 0; i < iterations; i++)
Thread::YieldProcessorNormalized();
YieldProcessorNormalized(normalizationInfo);

HELPER_METHOD_FRAME_END();
}
Expand Down
2 changes: 2 additions & 0 deletions src/vm/finalizerthread.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -744,6 +744,8 @@ DWORD WINAPI FinalizerThread::FinalizerThreadStart(void *args)
#endif
GetFinalizerThread()->SetBackground(TRUE);

EnsureYieldProcessorNormalizedInitialized();

#ifdef FEATURE_PROFAPI_ATTACH_DETACH
// Add the Profiler Attach Event to the array of event handles that the
// finalizer thread waits on. If the process is not enabled for profiler
Expand Down
40 changes: 28 additions & 12 deletions src/vm/threads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ SPTR_IMPL(ThreadStore, ThreadStore, s_pThreadStore);
CONTEXT *ThreadStore::s_pOSContext = NULL;
CLREvent *ThreadStore::s_pWaitForStackCrawlEvent;

static CrstStatic s_initializeYieldProcessorNormalizedCrst;

#ifndef DACCESS_COMPILE


Expand Down Expand Up @@ -1363,7 +1365,7 @@ void InitThreadManager()
}
CONTRACTL_END;

Thread::s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock);
s_initializeYieldProcessorNormalizedCrst.Init(CrstLeafLock);

// All patched helpers should fit into one page.
// If you hit this assert on retail build, there is most likely problem with BBT script.
Expand Down Expand Up @@ -11747,25 +11749,29 @@ ULONGLONG Thread::QueryThreadProcessorUsage()
}
#endif // FEATURE_APPDOMAIN_RESOURCE_MONITORING

CrstStatic Thread::s_initializeYieldProcessorNormalizedCrst;
int Thread::s_yieldsPerNormalizedYield = 0;
int Thread::s_optimalMaxNormalizedYieldsPerSpinIteration = 0;
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// YieldProcessorNormalized

// Defaults are for when InitializeYieldProcessorNormalized has not yet been called or when no measurement is done, and are
// tuned for Skylake processors
int g_yieldsPerNormalizedYield = 1; // 9 for pre-Skylake
int g_optimalMaxNormalizedYieldsPerSpinIteration = 7;

static Volatile<bool> s_isYieldProcessorNormalizedInitialized = false;

void Thread::InitializeYieldProcessorNormalized()
void InitializeYieldProcessorNormalized()
{
LIMITED_METHOD_CONTRACT;

CrstHolder lock(&s_initializeYieldProcessorNormalizedCrst);

if (IsYieldProcessorNormalizedInitialized())
if (s_isYieldProcessorNormalizedInitialized)
{
return;
}

// Intel pre-Skylake processor: measured typically 14-17 cycles per yield
// Intel post-Skylake processor: measured typically 125-150 cycles per yield
const int DefaultYieldsPerNormalizedYield = 1; // defaults are for when no measurement is done
const int DefaultOptimalMaxNormalizedYieldsPerSpinIteration = 64; // tuned for pre-Skylake processors, for post-Skylake it should be 7
const int MeasureDurationMs = 10;
const int MaxYieldsPerNormalizedYield = 10; // measured typically 8-9 on pre-Skylake
const int MinNsPerNormalizedYield = 37; // measured typically 37-46 on post-Skylake
Expand All @@ -11776,8 +11782,7 @@ void Thread::InitializeYieldProcessorNormalized()
if (!QueryPerformanceFrequency(&li) || (ULONGLONG)li.QuadPart < 1000 / MeasureDurationMs)
{
// High precision clock not available or clock resolution is too low, resort to defaults
s_yieldsPerNormalizedYield = DefaultYieldsPerNormalizedYield;
s_optimalMaxNormalizedYieldsPerSpinIteration = DefaultOptimalMaxNormalizedYieldsPerSpinIteration;
s_isYieldProcessorNormalizedInitialized = true;
return;
}
ULONGLONG ticksPerSecond = li.QuadPart;
Expand Down Expand Up @@ -11827,6 +11832,17 @@ void Thread::InitializeYieldProcessorNormalized()
optimalMaxNormalizedYieldsPerSpinIteration = 1;
}

s_yieldsPerNormalizedYield = yieldsPerNormalizedYield;
s_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration;
g_yieldsPerNormalizedYield = yieldsPerNormalizedYield;
g_optimalMaxNormalizedYieldsPerSpinIteration = optimalMaxNormalizedYieldsPerSpinIteration;
s_isYieldProcessorNormalizedInitialized = true;
}

void EnsureYieldProcessorNormalizedInitialized()
{
WRAPPER_NO_CONTRACT;

if (!s_isYieldProcessorNormalizedInitialized)
{
InitializeYieldProcessorNormalized();
}
}
132 changes: 67 additions & 65 deletions src/vm/threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -5362,71 +5362,6 @@ class Thread: public IUnknown
m_HijackReturnKind = returnKind;
}
#endif // FEATURE_HIJACK

private:
static CrstStatic s_initializeYieldProcessorNormalizedCrst;
static int s_yieldsPerNormalizedYield;
static int s_optimalMaxNormalizedYieldsPerSpinIteration;

private:
static void InitializeYieldProcessorNormalized();

public:
static bool IsYieldProcessorNormalizedInitialized()
{
LIMITED_METHOD_CONTRACT;
return s_yieldsPerNormalizedYield != 0 && s_optimalMaxNormalizedYieldsPerSpinIteration != 0;
}

public:
static void EnsureYieldProcessorNormalizedInitialized()
{
LIMITED_METHOD_CONTRACT;

if (!IsYieldProcessorNormalizedInitialized())
{
InitializeYieldProcessorNormalized();
}
}

public:
static int GetOptimalMaxNormalizedYieldsPerSpinIteration()
{
WRAPPER_NO_CONTRACT;
_ASSERTE(IsYieldProcessorNormalizedInitialized());

return s_optimalMaxNormalizedYieldsPerSpinIteration;
}

public:
static void YieldProcessorNormalized()
{
WRAPPER_NO_CONTRACT;
_ASSERTE(IsYieldProcessorNormalizedInitialized());

int n = s_yieldsPerNormalizedYield;
while (--n >= 0)
{
YieldProcessor();
}
}

static void YieldProcessorNormalizedWithBackOff(unsigned int spinIteration)
{
WRAPPER_NO_CONTRACT;
_ASSERTE(IsYieldProcessorNormalizedInitialized());

int n = s_optimalMaxNormalizedYieldsPerSpinIteration;
if (spinIteration <= 30 && (1 << spinIteration) < n)
{
n = 1 << spinIteration;
}
n *= s_yieldsPerNormalizedYield;
while (--n >= 0)
{
YieldProcessor();
}
}
};

// End of class Thread
Expand Down Expand Up @@ -7573,4 +7508,71 @@ class ThreadStateNCStackHolder

BOOL Debug_IsLockedViaThreadSuspension();

////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// YieldProcessorNormalized

extern int g_yieldsPerNormalizedYield;
extern int g_optimalMaxNormalizedYieldsPerSpinIteration;

void InitializeYieldProcessorNormalized();
void EnsureYieldProcessorNormalizedInitialized();

class YieldProcessorNormalizationInfo
{
private:
int m_yieldProcessorIterations;

YieldProcessorNormalizationInfo(int yieldsPerNormalizedYield) : m_yieldProcessorIterations(yieldsPerNormalizedYield)
{
}

YieldProcessorNormalizationInfo(
int yieldsPerNormalizedYield,
int optimalMaxNormalizedYieldsPerSpinIteration,
unsigned int spinIteration)
:
m_yieldProcessorIterations(
(
spinIteration <= 30 && (1 << spinIteration) < optimalMaxNormalizedYieldsPerSpinIteration
? 1 << spinIteration
: optimalMaxNormalizedYieldsPerSpinIteration
) * yieldsPerNormalizedYield)
{
}

public:
int GetYieldProcessorIterations() const
{
return m_yieldProcessorIterations;
}

static YieldProcessorNormalizationInfo GetNormalizationInfo()
{
LIMITED_METHOD_CONTRACT;
return YieldProcessorNormalizationInfo(g_yieldsPerNormalizedYield);
}

static YieldProcessorNormalizationInfo GetNormalizationInfoForBackOff(int spinIteration)
{
LIMITED_METHOD_CONTRACT;

return
YieldProcessorNormalizationInfo(
g_yieldsPerNormalizedYield,
g_optimalMaxNormalizedYieldsPerSpinIteration,
spinIteration);
}
};

FORCEINLINE void YieldProcessorNormalized(const YieldProcessorNormalizationInfo &normalizationInfo)
{
LIMITED_METHOD_CONTRACT;

int n = normalizationInfo.GetYieldProcessorIterations();
while (--n >= 0)
{
YieldProcessor();
}
}

#endif //__threads_h__

0 comments on commit 5cb8253

Please sign in to comment.