Skip to content

Commit

Permalink
Ensure that math calls into the CRT are tracked as needing vzeroupper (
Browse files Browse the repository at this point in the history
…#112011)

* Ensure that math calls into the CRT are tracked as needing vzeroupper

* Apply suggestions from code review

* Don't force vzeroupper for helpers written in managed
  • Loading branch information
tannergooding authored Feb 1, 2025
1 parent bc191c9 commit 6f221b4
Show file tree
Hide file tree
Showing 10 changed files with 125 additions and 76 deletions.
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10156,7 +10156,7 @@ JITDBGAPI void __cdecl cTreeFlags(Compiler* comp, GenTree* tree)
{
chars += printf("[CALL_M_NOGCCHECK]");
}
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
if (call->IsSpecialIntrinsic())
{
chars += printf("[CALL_M_SPECIAL_INTRINSIC]");
}
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -4765,7 +4765,8 @@ class Compiler
R2RARG(CORINFO_CONST_LOOKUP* entryPoint),
var_types callType,
NamedIntrinsic intrinsicName,
bool tailCall);
bool tailCall,
bool* isSpecial);
GenTree* impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig,
CorInfoType callJitType,
Expand Down
104 changes: 56 additions & 48 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2005,66 +2005,74 @@ bool GenTreeCall::NeedsVzeroupper(Compiler* comp)
}

bool needsVzeroupper = false;
bool checkSignature = false;

if (IsPInvoke())
{
// The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states:
// Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean
// between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a
// VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX
// register) and before any call to an unknown function.
// The Intel optimization manual guidance in `3.11.5.3 Fixing Instruction Slowdowns` states:
// Insert a VZEROUPPER to tell the hardware that the state of the higher registers is clean
// between the VEX and the legacy SSE instructions. Often the best way to do this is to insert a
// VZEROUPPER before returning from any function that uses VEX (that does not produce a VEX
// register) and before any call to an unknown function.

switch (gtCallType)
switch (gtCallType)
{
case CT_USER_FUNC:
case CT_INDIRECT:
{
case CT_USER_FUNC:
case CT_INDIRECT:
{
// Since P/Invokes are not compiled by the runtime, they are typically "unknown" since they
// may use the legacy encoding. This includes both CT_USER_FUNC and CT_INDIRECT
// Since P/Invokes are not compiled by the runtime, they are typically "unknown" since they
// may use the legacy encoding. This includes both CT_USER_FUNC and CT_INDIRECT

if (IsPInvoke())
{
needsVzeroupper = true;
break;
}

case CT_HELPER:
else if (IsSpecialIntrinsic())
{
// Most helpers are well known to not use any floating-point or SIMD logic internally, but
// a few do exist so we need to ensure they are handled. They are identified by taking or
// returning a floating-point or SIMD type, regardless of how it is actually passed/returned.
checkSignature = true;
}
break;
}

if (varTypeUsesFloatReg(this))
case CT_HELPER:
{
// A few special cases exist that can't be found by signature alone, so we handle
// those explicitly here instead.
needsVzeroupper = IsHelperCall(comp, CORINFO_HELP_BULK_WRITEBARRIER);

// Most other helpers are well known to not use any floating-point or SIMD logic internally, but
// a few do exist so we need to ensure they are handled. They are identified by taking or
// returning a floating-point or SIMD type, regardless of how it is actually passed/returned but
// are excluded if we know they are implemented in managed.
checkSignature = !needsVzeroupper && !IsHelperCall(comp, CORINFO_HELP_DBL2INT_OVF) &&
!IsHelperCall(comp, CORINFO_HELP_DBL2LNG_OVF) &&
!IsHelperCall(comp, CORINFO_HELP_DBL2UINT_OVF) &&
!IsHelperCall(comp, CORINFO_HELP_DBL2ULNG_OVF);
break;
}

default:
{
unreached();
}
}

if (checkSignature)
{
if (varTypeUsesFloatReg(this))
{
needsVzeroupper = true;
}
else
{
for (CallArg& arg : gtArgs.Args())
{
if (varTypeUsesFloatReg(arg.GetSignatureType()))
{
needsVzeroupper = true;
break;
}
else
{
for (CallArg& arg : gtArgs.Args())
{
if (varTypeUsesFloatReg(arg.GetSignatureType()))
{
needsVzeroupper = true;
break;
}
}
}
break;
}

default:
{
unreached();
}
}
}

// Other special cases
//
if (!needsVzeroupper && IsHelperCall(comp, CORINFO_HELP_BULK_WRITEBARRIER))
{
needsVzeroupper = true;
}

return needsVzeroupper;
}
#endif // TARGET_XARCH
Expand Down Expand Up @@ -13708,7 +13716,7 @@ GenTree* Compiler::gtFoldExprCall(GenTreeCall* call)
assert(!call->gtArgs.AreArgsComplete());

// Can only fold calls to special intrinsics.
if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0)
if (!call->IsSpecialIntrinsic())
{
return call;
}
Expand Down Expand Up @@ -17675,7 +17683,7 @@ Compiler::TypeProducerKind Compiler::gtGetTypeProducerKind(GenTree* tree)
return TPK_Handle;
}
}
else if (tree->AsCall()->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
else if (tree->AsCall()->IsSpecialIntrinsic())
{
if (lookupNamedIntrinsic(tree->AsCall()->gtCallMethHnd) == NI_System_Object_GetType)
{
Expand Down Expand Up @@ -19135,7 +19143,7 @@ CORINFO_CLASS_HANDLE Compiler::gtGetClassHandle(GenTree* tree, bool* pIsExact, b
case GT_CALL:
{
GenTreeCall* call = obj->AsCall();
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
if (call->IsSpecialIntrinsic())
{
NamedIntrinsic ni = lookupNamedIntrinsic(call->gtCallMethHnd);
if ((ni == NI_System_Array_Clone) || (ni == NI_System_Object_MemberwiseClone))
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/helperexpansion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1605,7 +1605,7 @@ PhaseStatus Compiler::fgVNBasedIntrinsicExpansion()
//
bool Compiler::fgVNBasedIntrinsicExpansionForCall(BasicBlock** pBlock, Statement* stmt, GenTreeCall* call)
{
if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) == 0)
if (!call->IsSpecialIntrinsic())
{
return false;
}
Expand Down
35 changes: 26 additions & 9 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1490,7 +1490,7 @@ var_types Compiler::impImportCall(OPCODE opcode,
{
spillStack = false;
}
else if ((callNode->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0)
else if (callNode->IsSpecialIntrinsic())
{
spillStack = false;
}
Expand Down Expand Up @@ -4090,7 +4090,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
if (impStackTop().val->OperIs(GT_RET_EXPR))
{
GenTreeCall* call = impStackTop().val->AsRetExpr()->gtInlineCandidate->AsCall();
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
if (call->IsSpecialIntrinsic())
{
if (lookupNamedIntrinsic(call->gtCallMethHnd) == NI_System_Threading_Thread_get_CurrentThread)
{
Expand Down Expand Up @@ -4336,7 +4336,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
case NI_System_Math_Log2:
case NI_System_Math_Log10:
{
retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall);
retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall, &isSpecial);
break;
}

Expand Down Expand Up @@ -4429,7 +4429,7 @@ GenTree* Compiler::impIntrinsic(CORINFO_CLASS_HANDLE clsHnd,
case NI_System_Math_Tanh:
case NI_System_Math_Truncate:
{
retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall);
retNode = impMathIntrinsic(method, sig R2RARG(entryPoint), callType, ni, tailCall, &isSpecial);
break;
}

Expand Down Expand Up @@ -9533,29 +9533,46 @@ GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method,
CORINFO_SIG_INFO* sig R2RARG(CORINFO_CONST_LOOKUP* entryPoint),
var_types callType,
NamedIntrinsic intrinsicName,
bool tailCall)
bool tailCall,
bool* isSpecial)
{
GenTree* op1;
GenTree* op2;

assert(callType != TYP_STRUCT);
assert(IsMathIntrinsic(intrinsicName));
assert(isSpecial != nullptr);

op1 = nullptr;

bool isIntrinsicImplementedByUserCall = IsIntrinsicImplementedByUserCall(intrinsicName);

if (isIntrinsicImplementedByUserCall)
{
#if defined(TARGET_XARCH)
// We want to track math intrinsics implemented as user calls as special
// to ensure we don't lose track of the fact it will call into native code
//
// This is used on xarch to track that it may need vzeroupper inserted to
// avoid the perf penalty on some hardware.

*isSpecial = true;
#endif // TARGET_XARCH
}

#if !defined(TARGET_X86)
// Intrinsics that are not implemented directly by target instructions will
// be re-materialized as users calls in rationalizer. For prefixed tail calls,
// don't do this optimization, because
// a) For back compatibility reasons on desktop .NET Framework 4.6 / 4.6.1
// b) It will be non-trivial task or too late to re-materialize a surviving
// tail prefixed GT_INTRINSIC as tail call in rationalizer.
if (!IsIntrinsicImplementedByUserCall(intrinsicName) || !tailCall)
if (!isIntrinsicImplementedByUserCall || !tailCall)
#else
// On x86 RyuJIT, importing intrinsics that are implemented as user calls can cause incorrect calculation
// of the depth of the stack if these intrinsics are used as arguments to another call. This causes bad
// code generation for certain EH constructs.
if (!IsIntrinsicImplementedByUserCall(intrinsicName))
if (!isIntrinsicImplementedByUserCall)
#endif
{
CORINFO_ARG_LIST_HANDLE arg = sig->args;
Expand Down Expand Up @@ -9588,7 +9605,7 @@ GenTree* Compiler::impMathIntrinsic(CORINFO_METHOD_HANDLE method,
NO_WAY("Unsupported number of args for Math Intrinsic");
}

if (IsIntrinsicImplementedByUserCall(intrinsicName))
if (isIntrinsicImplementedByUserCall)
{
op1->gtFlags |= GTF_CALL;
}
Expand Down Expand Up @@ -10073,7 +10090,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH

// TODO-CQ: Returning this as an intrinsic blocks inlining and is undesirable
// return impMathIntrinsic(method, sig, callType, intrinsicName, tailCall);
// return impMathIntrinsic(method, sig, callType, intrinsicName, tailCall, isSpecial);

return nullptr;
}
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/importervectorization.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -351,7 +351,7 @@ GenTreeStrCon* Compiler::impGetStrConFromSpan(GenTree* span)
argCall = span->AsCall();
}

if ((argCall != nullptr) && ((argCall->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0))
if ((argCall != nullptr) && argCall->IsSpecialIntrinsic())
{
const NamedIntrinsic ni = lookupNamedIntrinsic(argCall->gtCallMethHnd);
if ((ni == NI_System_MemoryExtensions_AsSpan) || (ni == NI_System_String_op_Implicit))
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lower.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2817,7 +2817,7 @@ GenTree* Lowering::LowerCall(GenTree* node)

#if defined(TARGET_AMD64) || defined(TARGET_ARM64)
GenTree* nextNode = nullptr;
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
if (call->IsSpecialIntrinsic())
{
switch (comp->lookupNamedIntrinsic(call->gtCallMethHnd))
{
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4975,7 +4975,7 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
#endif
};

if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
if (call->IsSpecialIntrinsic())
{
failTailCall("Might turn into an intrinsic");
return nullptr;
Expand Down Expand Up @@ -6870,7 +6870,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call)
#endif
}

if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0)
if (call->IsSpecialIntrinsic())
{
if (lookupNamedIntrinsic(call->AsCall()->gtCallMethHnd) ==
NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8)
Expand Down Expand Up @@ -6959,7 +6959,7 @@ GenTree* Compiler::fgMorphCall(GenTreeCall* call)
// Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag
//
// We need to do these before the arguments are morphed
if (!call->gtArgs.AreArgsComplete() && (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
if (!call->gtArgs.AreArgsComplete() && call->IsSpecialIntrinsic())
{
// See if this is foldable
GenTree* optTree = gtFoldExprCall(call);
Expand Down
Loading

0 comments on commit 6f221b4

Please sign in to comment.