Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize Min/Max paths with AVX10.2 intrinsics #112535

Merged
merged 20 commits into from
Mar 4, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
8abbfd7
Use Avx10.2 MinMax instruction for gtMinNode and gtMaxNode
khushal1996 Jan 28, 2025
f4c7f6c
Add knob for enabling Avx10.2 in debug mode
khushal1996 Jan 29, 2025
9341d72
Use new minmax intrinsics for scalar min max operations
khushal1996 Jan 30, 2025
5e74f5e
Limit JIT unit suite within the subsets which are stable in SDE.
Ruihan-Yin Aug 2, 2024
5569500
Add proper docstrings to explain the new instruction
khushal1996 Jan 30, 2025
bd859b2
delete unnecessary tests for debug
khushal1996 Jan 30, 2025
7ea3a7a
resolve compile errors
khushal1996 Jan 30, 2025
3a73ef7
Resolve error existing on main right now. Error from wrong base type …
khushal1996 Jan 31, 2025
31338ee
Revert "delete unnecessary tests for debug"
khushal1996 Jan 31, 2025
9e87010
Revert "Resolve error existing on main right now. Error from wrong ba…
khushal1996 Feb 6, 2025
14f501c
Move MinMax control byte logic to a separate function
khushal1996 Feb 10, 2025
e895006
Revert "Add knob for enabling Avx10.2 in debug mode"
khushal1996 Feb 10, 2025
6f0e04e
Revert "Limit JIT unit suite within the subsets which are stable in S…
khushal1996 Feb 10, 2025
fdeaa5a
Refactoring
khushal1996 Feb 10, 2025
e707528
Add comments explaining controlByte for minmax
khushal1996 Feb 11, 2025
92e19ff
Run formatting
khushal1996 Feb 13, 2025
43824b5
Use simd hardware intrinsics only when available
khushal1996 Feb 16, 2025
8969e6a
Move things inline and adddress review comments
khushal1996 Feb 20, 2025
fbda438
Merge branch 'main' into kcm-avx102-opt2-public-pr
khushal1996 Feb 20, 2025
938fdb5
Merge branch 'main' into kcm-avx102-opt2-public-pr
khushal1996 Mar 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24366,6 +24366,12 @@ GenTree* Compiler::gtNewSimdMaxNode(
#if defined(TARGET_XARCH)
if (varTypeIsFloating(simdBaseType))
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
{
NamedIntrinsic minMaxIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_MinMax : NI_AVX10v2_MinMax;
return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x05), minMaxIntrinsic, simdBaseJitType,
simdSize);
}
GenTree* op1Dup1 = fgMakeMultiUse(&op1);
GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
GenTree* op1Dup3 = gtCloneExpr(op1Dup2);
Expand Down Expand Up @@ -24625,6 +24631,12 @@ GenTree* Compiler::gtNewSimdMinNode(
#if defined(TARGET_XARCH)
if (varTypeIsFloating(simdBaseType))
{
if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
{
NamedIntrinsic minMaxIntrinsic = (simdSize == 64) ? NI_AVX10v2_V512_MinMax : NI_AVX10v2_MinMax;
return gtNewSimdHWIntrinsicNode(type, op1, op2, gtNewIconNode(0x04), minMaxIntrinsic, simdBaseJitType,
simdSize);
}
GenTree* op1Dup1 = fgMakeMultiUse(&op1);
GenTree* op1Dup2 = gtCloneExpr(op1Dup1);
GenTree* op1Dup3 = gtCloneExpr(op1Dup2);
Expand Down
36 changes: 36 additions & 0 deletions src/coreclr/jit/importercalls.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9657,6 +9657,42 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
GenTree* op2 = impImplicitR4orR8Cast(impStackTop().val, callType);
GenTree* op1 = impImplicitR4orR8Cast(impStackTop(1).val, callType);

#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
// If Avx10.2 is enabled, the min/max operations can be done using the
// new minmax instructions which is faster than using the combination
// of instructions for lower ISAs. We can use the minmax instructions

if (compOpportunisticallyDependsOn(InstructionSet_AVX10v2))
{
impPopStack();
impPopStack();
/**
* ctrlByte A control byte (imm8) that specifies the type of min/max operation and sign behavior:
* - Bits [1:0] (Op-select): Determines the operation performed:
* - 0b00: minimum - Returns x if x ≤ y, otherwise y; NaN handling applies.
* - 0b01: maximum - Returns x if x ≥ y, otherwise y; NaN handling applies.
* - 0b10: minimumMagnitude - Compares absolute values, returns the smaller magnitude.
* - 0b11: maximumMagnitude - Compares absolute values, returns the larger magnitude.
* - Bit [4] (min/max mode): Determines whether the instruction follows IEEE-compliant NaN handling:
* - 0: Standard min/max (propagates NaNs).
* - 1: Number-preferential min/max (ignores signaling NaNs).
* - Bits [3:2] (Sign control): Defines how the result’s sign is determined:
* - 0b00: Select sign from the first operand (src1).
* - 0b01: Select sign from the comparison result.
* - 0b10: Force result sign to 0 (positive).
* - 0b11: Force result sign to 1 (negative).
*/
uint8_t ctrlByte = 0x04; // Select sign from comparison result
ctrlByte |= isMax ? 0x01 : 0x00;
ctrlByte |= isMagnitude ? 0x02 : 0x00;
ctrlByte |= isNumber ? 0x10 : 0x00;

GenTree* retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, gtNewIconNode(ctrlByte),
NI_AVX10v2_MinMaxScalar, callJitType, 16);
return gtNewSimdToScalarNode(genActualType(callType), retNode, callJitType, 16);
}
#endif // FEATURE_HW_INTRINSICS && TARGET_XARCH

if (op2->IsCnsFltOrDbl())
{
cnsNode = op2->AsDblCon();
Expand Down
Loading