Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Min/Max paths with AVX10.2 intrinsics #112535

Merged
merged 20 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
8abbfd7
Use Avx10.2 MinMax instruction for gtMinNode and gtMaxNode
khushal1996 Jan 28, 2025
f4c7f6c
Add knob for enabling Avx10.2 in debug mode
khushal1996 Jan 29, 2025
9341d72
Use new minmax intrinsics for scalar min max operations
khushal1996 Jan 30, 2025
5e74f5e
Limit JIT unit suite within the subsets which are stable in SDE.
Ruihan-Yin Aug 2, 2024
5569500
Add proper docstrings to explain the new instruction
khushal1996 Jan 30, 2025
bd859b2
delete unnecessary tests for debug
khushal1996 Jan 30, 2025
7ea3a7a
resolve compile errors
khushal1996 Jan 30, 2025
3a73ef7
Resolve error existing on main right now. Error from wrong base type …
khushal1996 Jan 31, 2025
31338ee
Revert "delete unnecessary tests for debug"
khushal1996 Jan 31, 2025
9e87010
Revert "Resolve error existing on main right now. Error from wrong ba…
khushal1996 Feb 6, 2025
14f501c
Move MinMax control byte logic to a separate function
khushal1996 Feb 10, 2025
e895006
Revert "Add knob for enabling Avx10.2 in debug mode"
khushal1996 Feb 10, 2025
6f0e04e
Revert "Limit JIT unit suite within the subsets which are stable in S…
khushal1996 Feb 10, 2025
fdeaa5a
Refactoring
khushal1996 Feb 10, 2025
e707528
Add comments explaining controlByte for minmax
khushal1996 Feb 11, 2025
92e19ff
Run formatting
khushal1996 Feb 13, 2025
43824b5
Use simd hardware intrinsics only when available
khushal1996 Feb 16, 2025
8969e6a
Move things inline and adddress review comments
khushal1996 Feb 20, 2025
fbda438
Merge branch 'main' into kcm-avx102-opt2-public-pr
khushal1996 Feb 20, 2025
938fdb5
Merge branch 'main' into kcm-avx102-opt2-public-pr
khushal1996 Mar 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3521,6 +3521,17 @@ class Compiler
var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize);

#if defined(TARGET_XARCH)
GenTree* gtNewSimdMinMaxNode(var_types type,
GenTree* op1,
GenTree* op2,
ssize_t ctrlByte,
CorInfoType simdBaseJitType,
unsigned simdSize);

uint8_t gtMinMaxControlByte(bool isMax = false,
bool isMagnitude = false,
bool isNumber = false);

GenTree* gtNewSimdTernaryLogicNode(var_types type,
GenTree* op1,
GenTree* op2,
Expand Down
161 changes: 161 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24404,6 +24404,10 @@ GenTree* Compiler::gtNewSimdMaxNode(
#if defined(TARGET_XARCH)
if (varTypeIsFloating(simdBaseType))
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
{
return gtNewSimdMinMaxNode(type, op1, op2, gtMinMaxControlByte(true), simdBaseJitType, simdSize);
}
GenTree* op1Dup1 = fgMakeMultiUse(&op1);
GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
GenTree* op1Dup3 = gtCloneExpr(op1Dup2);
Expand Down Expand Up @@ -24663,6 +24667,10 @@ GenTree* Compiler::gtNewSimdMinNode(
#if defined(TARGET_XARCH)
if (varTypeIsFloating(simdBaseType))
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
{
return gtNewSimdMinMaxNode(type, op1, op2, gtMinMaxControlByte(), simdBaseJitType, simdSize);
}
GenTree* op1Dup1 = fgMakeMultiUse(&op1);
GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
GenTree* op1Dup3 = gtCloneExpr(op1Dup2);
Expand All @@ -24687,6 +24695,159 @@ GenTree* Compiler::gtNewSimdMinNode(
return gtNewSimdMinNativeNode(type, op1, op2, simdBaseJitType, simdSize);
}

#if defined(TARGET_XARCH)
/**
* Creates a new SIMD node for performing minimum/maximum operations based on the given control byte.
*
* @param type The SIMD type of the operation result.
* @param op1 The first operand.
* @param op2 The second operand.
* @param ctrlByte A control byte (imm8) that specifies the type of min/max operation and sign behavior:
* - Bits [1:0] (Op-select): Determines the operation performed:
* - 0b00: minimum - Returns x if x ≤ y, otherwise y; NaN handling applies.
* - 0b01: maximum - Returns x if x ≥ y, otherwise y; NaN handling applies.
* - 0b10: minimumMagnitude - Compares absolute values, returns the smaller magnitude.
* - 0b11: maximumMagnitude - Compares absolute values, returns the larger magnitude.
* - Bit [4] (min/max mode): Determines whether the instruction follows IEEE-compliant NaN
* handling:
* - 0: Standard min/max (propagates NaNs).
* - 1: Number-preferential min/max (ignores signaling NaNs).
* - Bits [3:2] (Sign control): Defines how the result’s sign is determined:
* - 0b00: Select sign from the first operand (src1).
* - 0b01: Select sign from the comparison result.
* - 0b10: Force result sign to 0 (positive).
* - 0b11: Force result sign to 1 (negative).
* @param simdBaseJitType The base JIT type of the SIMD vector (e.g., float, int).
* @param simdSize The size of the SIMD vector in bytes.
*
* @return A new GenTree node representing the SIMD min/max operation.
*/
GenTree* Compiler::gtNewSimdMinMaxNode(
var_types type, GenTree* op1, GenTree* op2, ssize_t ctrlByte, CorInfoType simdBaseJitType, unsigned simdSize)
{
assert(IsBaselineSimdIsaSupportedDebugOnly());
assert(compIsaSupportedDebugOnly(InstructionSet_AVX10v2)); // Support for new MinMax instructions for AVX10.2
// required
assert(simdSize != 64 || IsBaselineVector512IsaSupportedDebugOnly());
assert(varTypeIsSIMD(type));
assert(getSIMDTypeForSize(simdSize) == type);
assert(op1 != nullptr);
assert(op1->TypeIs(type));
assert(op2 != nullptr);
assert(op2->TypeIs(type));
var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
assert(varTypeIsArithmetic(simdBaseType));
assert(varTypeIsFloating(simdBaseType));
NamedIntrinsic minMaxIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_MinMax : NI_AVX10v2_MinMax;
return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(ctrlByte), minMaxIntrinsic, simdBaseJitType,
simdSize);
}

/**
* @brief Generates a control byte for a SIMD Min/Max operation.
*
* This function constructs a control byte (imm8) for Min/Max SIMD operations based on the provided flags.
* The control byte defines the operation type (e.g., Min or Max), and whether the operation should consider
* the magnitude or numerical values (NaN-aware comparisons).
*
* @param isMax Specifies if the operation is Max (true) or Min (false).
* - If true, the operation is Max.
* - If false, the operation is Min.
* @param isNumber Specifies if the operation should be NaN-aware (true) or not (false).
* - If true, NaN values are considered in the comparison, producing NaN if one of the operands is NaN.
* - If false, NaN values are ignored, and the result is based on the non-NaN operand.
* @param isMagnitude Specifies if the operation is based on the magnitude of the values (true) or not (false).
* - If true, comparisons are performed based on the absolute value of the operands.
* - If false, comparisons are performed on the actual values, including the sign.
*
* @return A `uint8_t` value representing the control byte (imm8) for the specified operation.
* The returned value can be directly used in SIMD Min/Max instructions.
*/
uint8_t Compiler::gtMinMaxControlByte(bool isMax, bool isMagnitude, bool isNumber)
{
/**
* ctrlByte A control byte (imm8) that specifies the type of min/max operation and sign behavior:
* - Bits [1:0] (Op-select): Determines the operation performed:
* - 0b00: minimum - Returns x if x ≤ y, otherwise y; NaN handling applies.
* - 0b01: maximum - Returns x if x ≥ y, otherwise y; NaN handling applies.
* - 0b10: minimumMagnitude - Compares absolute values, returns the smaller magnitude.
* - 0b11: maximumMagnitude - Compares absolute values, returns the larger magnitude.
* - Bit [4] (min/max mode): Determines whether the instruction follows IEEE-compliant NaN handling:
* - 0: Standard min/max (propagates NaNs).
* - 1: Number-preferential min/max (ignores signaling NaNs).
* - Bits [3:2] (Sign control): Defines how the result’s sign is determined:
* - 0b00: Select sign from the first operand (src1).
* - 0b01: Select sign from the comparison result.
* - 0b10: Force result sign to 0 (positive).
* - 0b11: Force result sign to 1 (negative).
*/
uint8_t ctrlByte;

if (isMax)
{
if (isMagnitude)
{
if (isNumber)
{
// min/max mode | sign control | Op-select
// 0001 0000 | 0000 0100 | 0000 0011
ctrlByte = 0x17;
}
else
{
// min/max mode | sign control | Op-select
// 0000 0000 | 0000 0100 | 0000 0011
ctrlByte = 0x07;
}
}
else if (isNumber)
{
// min/max mode | sign control | Op-select
// 0001 0000 | 0000 0100 | 0000 0001
ctrlByte = 0x15;
}
else
{
// min/max mode | sign control | Op-select
// 0000 0000 | 0000 0100 | 0000 0001
ctrlByte = 0x05;
}
}
else
{
if (isMagnitude)
{
if (isNumber)
{
// min/max mode | sign control | Op-select
// 0001 0000 | 0000 0100 | 0000 0010
ctrlByte = 0x16;
}
else
{
// min/max mode | sign control | Op-select
// 0000 0000 | 0000 0100 | 0000 0010
ctrlByte = 0x06;
}
}
else if (isNumber)
{
// min/max mode | sign control | Op-select
// 0001 0000 | 0000 0100 | 0000 0000
ctrlByte = 0x14;
}
else
{
// min/max mode | sign control | Op-select
// 0000 0000 | 0000 0100 | 0000 0000
ctrlByte = 0x04;
}
}
return ctrlByte;
}

#endif // TARGET_XARCH

GenTree* Compiler::gtNewSimdMinNativeNode(
var_types type, GenTree* op1, GenTree* op2, CorInfoType simdBaseJitType, unsigned simdSize)
{
Expand Down
17 changes: 17 additions & 0 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9647,6 +9647,23 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
GenTree* op2 = impImplicitR4orR8Cast(impStackTop().val, callType);
GenTree* op1 = impImplicitR4orR8Cast(impStackTop(1).val, callType);

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
// If Avx10.2 is enabled, the min/max operations can be done using the
// new minmax instructions which is faster than using the combination
// of instructions for lower ISAs. We can use the minmax instructions

if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
{
impPopStack();
impPopStack();
uint8_t ctrlByte = gtMinMaxControlByte(isMax, isMagnitude, isNumber);

GenTree* retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(ctrlByte),
NI_AVX10v2_MinMaxScalar, callJitType, 16);
return gtNewSimdToScalarNode(genActualType(callType), retNode, callJitType, 16);
}
#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH

if (op2->IsCnsFltOrDbl())
{
cnsNode = op2->AsDblCon();
Expand Down
Loading