Skip to content

Commit

Permalink
Add VPCLMULQDQ intrinsics (#109137)
Browse files Browse the repository at this point in the history
* add vpclmulqdq intrinsics

* add missing break

* add alternate instruction def for evex encoding

* rename instruction

* whitespace

* re-run thunk generator

* fix AOT instruction sets

* address feedback

* apply formatting patch

* address feedback round 2

* add missing brace

* fix smoketest expected results

* fix suffix order

* handle implied V512 support in AOT

* remove more unnecessary X64 ISA variants

---------

Co-authored-by: Tanner Gooding <[email protected]>
  • Loading branch information
saucecontrol and tannergooding authored Nov 20, 2024
1 parent b85f9f6 commit 08a36ca
Show file tree
Hide file tree
Showing 47 changed files with 775 additions and 359 deletions.
1 change: 1 addition & 0 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -777,6 +777,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI2, W("EnableBMI2"),
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableFMA, W("EnableFMA"), 1, "Allows FMA+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableLZCNT, W("EnableLZCNT"), 1, "Allows LZCNT+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePCLMULQDQ, W("EnablePCLMULQDQ"), 1, "Allows PCLMULQDQ+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableVPCLMULQDQ, W("EnableVPCLMULQDQ"), 1, "Allows VPCLMULQDQ+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableMOVBE, W("EnableMOVBE"), 1, "Allows MOVBE+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnablePOPCNT, W("EnablePOPCNT"), 1, "Allows POPCNT+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableSSE, W("EnableSSE"), 1, "Allows SSE+ hardware intrinsics to be disabled")
Expand Down
230 changes: 116 additions & 114 deletions src/coreclr/inc/corinfoinstructionset.h

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* 381fc250-b8f3-4cee-834e-b0bc682a09f2 */
0x381fc250,
0xb8f3,
0x4cee,
{0x83, 0x4e, 0xb0, 0xbc, 0x68, 0x2a, 0x09, 0xf2}
constexpr GUID JITEEVersionIdentifier = { /* 9014d652-5dc7-4edf-9285-6644d0898fb5 */
0x9014d652,
0x5dc7,
0x4edf,
{0x92, 0x85, 0x66, 0x44, 0xd0, 0x89, 0x8f, 0xb5}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Avx10v1_V512=46,
READYTORUN_INSTRUCTION_EVEX=47,
READYTORUN_INSTRUCTION_Apx=48,
READYTORUN_INSTRUCTION_Pclmulqdq_V256=49,
READYTORUN_INSTRUCTION_Pclmulqdq_V512=50,

};

Expand Down
6 changes: 6 additions & 0 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6172,6 +6172,12 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ);
}

if (JitConfig.EnableVPCLMULQDQ() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V256);
instructionSetFlags.AddInstructionSet(InstructionSet_PCLMULQDQ_V512);
}

if (JitConfig.EnablePOPCNT() != 0)
{
instructionSetFlags.AddInstructionSet(InstructionSet_POPCNT);
Expand Down
14 changes: 13 additions & 1 deletion src/coreclr/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,19 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const
{
return false;
}
return HasEvexEncoding(ins);

switch (ins)
{
case INS_pclmulqdq:
{
return emitComp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256);
}

default:
{
return HasEvexEncoding(ins);
}
}
}

//------------------------------------------------------------------------
Expand Down
27 changes: 23 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20580,19 +20580,38 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)
// EVEX form for its intended lowering instruction.
//
// Return Value:
// true if the intrisic node lowering instruction has an EVEX form
// true if the intrinsic node lowering instruction has an EVEX form
//
bool GenTree::isEvexCompatibleHWIntrinsic() const
bool GenTree::isEvexCompatibleHWIntrinsic(Compiler* comp) const
{
return OperIsHWIntrinsic() && HWIntrinsicInfo::HasEvexSemantics(AsHWIntrinsic()->GetHWIntrinsicId());
#if defined(TARGET_XARCH)
if (OperIsHWIntrinsic())
{
NamedIntrinsic intrinsicId = AsHWIntrinsic()->GetHWIntrinsicId();

switch (intrinsicId)
{
case NI_PCLMULQDQ_CarrylessMultiply:
{
return comp->compOpportunisticallyDependsOn(InstructionSet_PCLMULQDQ_V256);
}

default:
{
return HWIntrinsicInfo::HasEvexSemantics(intrinsicId);
}
}
}
#endif
return false;
}

//------------------------------------------------------------------------
// isEmbeddedMaskingCompatibleHWIntrinsic : Checks if the intrinsic is compatible
// with the EVEX embedded masking form for its intended lowering instruction.
//
// Return Value:
// true if the intrisic node lowering instruction has an EVEX embedded masking
// true if the intrinsic node lowering instruction has an EVEX embedded masking
//
bool GenTree::isEmbeddedMaskingCompatibleHWIntrinsic() const
{
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1480,7 +1480,7 @@ struct GenTree
bool isCommutativeHWIntrinsic() const;
bool isContainableHWIntrinsic() const;
bool isRMWHWIntrinsic(Compiler* comp);
bool isEvexCompatibleHWIntrinsic() const;
bool isEvexCompatibleHWIntrinsic(Compiler* comp) const;
bool isEmbeddedMaskingCompatibleHWIntrinsic() const;
#else
bool isCommutativeHWIntrinsic() const
Expand Down
4 changes: 2 additions & 2 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -781,6 +781,8 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ FIRST_NI_FMA, LAST_NI_FMA },
{ FIRST_NI_LZCNT, LAST_NI_LZCNT },
{ FIRST_NI_PCLMULQDQ, LAST_NI_PCLMULQDQ },
{ FIRST_NI_PCLMULQDQ_V256, LAST_NI_PCLMULQDQ_V256 },
{ FIRST_NI_PCLMULQDQ_V512, LAST_NI_PCLMULQDQ_V512 },
{ FIRST_NI_POPCNT, LAST_NI_POPCNT },
{ FIRST_NI_Vector128, LAST_NI_Vector128 },
{ FIRST_NI_Vector256, LAST_NI_Vector256 },
Expand Down Expand Up @@ -822,9 +824,7 @@ static const HWIntrinsicIsaRange hwintrinsicIsaRangeArray[] = {
{ NI_Illegal, NI_Illegal }, // PCLMULQDQ_X64
{ FIRST_NI_POPCNT_X64, LAST_NI_POPCNT_X64 },
{ NI_Illegal, NI_Illegal }, // AVXVNNI_X64
{ NI_Illegal, NI_Illegal }, // MOVBE_X64
{ NI_Illegal, NI_Illegal }, // X86Serialize_X64
{ NI_Illegal, NI_Illegal }, // EVEX_X64
{ FIRST_NI_AVX512F_X64, LAST_NI_AVX512F_X64 },
{ NI_Illegal, NI_Illegal }, // AVX512BW_X64
{ NI_Illegal, NI_Illegal }, // AVX512CD_X64
Expand Down
1 change: 0 additions & 1 deletion src/coreclr/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -782,7 +782,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case InstructionSet_AVX10v1_V512:
case InstructionSet_AVX10v1_V512_X64:
case InstructionSet_EVEX:
case InstructionSet_EVEX_X64:
{
genAvxFamilyIntrinsic(node, instOptions);
break;
Expand Down
20 changes: 19 additions & 1 deletion src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -1524,9 +1524,27 @@ HARDWARE_INTRINSIC(LZCNT_X64, LeadingZeroCount,
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// PCLMULQDQ Intrinsics
#define FIRST_NI_PCLMULQDQ NI_PCLMULQDQ_CarrylessMultiply
HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
HARDWARE_INTRINSIC(PCLMULQDQ, CarrylessMultiply, 16, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
#define LAST_NI_PCLMULQDQ NI_PCLMULQDQ_CarrylessMultiply

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// PCLMULQDQ_V256 Intrinsics
#define FIRST_NI_PCLMULQDQ_V256 NI_PCLMULQDQ_V256_CarrylessMultiply
HARDWARE_INTRINSIC(PCLMULQDQ_V256, CarrylessMultiply, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
#define LAST_NI_PCLMULQDQ_V256 NI_PCLMULQDQ_V256_CarrylessMultiply

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// PCLMULQDQ_V512 Intrinsics
#define FIRST_NI_PCLMULQDQ_V512 NI_PCLMULQDQ_V512_CarrylessMultiply
HARDWARE_INTRINSIC(PCLMULQDQ_V512, CarrylessMultiply, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pclmulqdq, INS_pclmulqdq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
#define LAST_NI_PCLMULQDQ_V512 NI_PCLMULQDQ_V512_CarrylessMultiply

// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
Expand Down
35 changes: 30 additions & 5 deletions src/coreclr/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,6 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_POPCNT_X64;
case InstructionSet_X86Serialize:
return InstructionSet_X86Serialize_X64;
case InstructionSet_EVEX:
return InstructionSet_EVEX_X64;
default:
return InstructionSet_NONE;
}
Expand Down Expand Up @@ -103,13 +101,32 @@ static CORINFO_InstructionSet VLVersionOfIsa(CORINFO_InstructionSet isa)
}

//------------------------------------------------------------------------
// V512VersionOfIsa: Gets the corresponding AVX10V512 only InstructionSet for a given InstructionSet
// V256VersionOfIsa: Gets the corresponding V256 only InstructionSet for a given InstructionSet
//
// Arguments:
// isa -- The InstructionSet ID
//
// Return Value:
// The AVX10V512 only InstructionSet associated with isa
// The V256 only InstructionSet associated with isa
static CORINFO_InstructionSet V256VersionOfIsa(CORINFO_InstructionSet isa)
{
switch (isa)
{
case InstructionSet_PCLMULQDQ:
return InstructionSet_PCLMULQDQ_V256;
default:
return InstructionSet_NONE;
}
}

//------------------------------------------------------------------------
// V512VersionOfIsa: Gets the corresponding V512 only InstructionSet for a given InstructionSet
//
// Arguments:
// isa -- The InstructionSet ID
//
// Return Value:
// The V512 only InstructionSet associated with isa
static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa)
{
switch (isa)
Expand All @@ -118,6 +135,8 @@ static CORINFO_InstructionSet V512VersionOfIsa(CORINFO_InstructionSet isa)
return InstructionSet_AVX10v1_V512;
case InstructionSet_AVX10v1_X64:
return InstructionSet_AVX10v1_V512_X64;
case InstructionSet_PCLMULQDQ:
return InstructionSet_PCLMULQDQ_V512;
default:
return InstructionSet_NONE;
}
Expand Down Expand Up @@ -330,7 +349,11 @@ CORINFO_InstructionSet HWIntrinsicInfo::lookupIsa(const char* className,

if (className[0] == 'V')
{
if (strcmp(className, "V512") == 0)
if (strcmp(className, "V256") == 0)
{
return V256VersionOfIsa(enclosingIsa);
}
else if (strcmp(className, "V512") == 0)
{
return V512VersionOfIsa(enclosingIsa);
}
Expand Down Expand Up @@ -847,6 +870,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa)
case InstructionSet_LZCNT_X64:
case InstructionSet_PCLMULQDQ:
case InstructionSet_PCLMULQDQ_X64:
case InstructionSet_PCLMULQDQ_V256:
case InstructionSet_PCLMULQDQ_V512:
case InstructionSet_POPCNT:
case InstructionSet_POPCNT_X64:
case InstructionSet_SSE:
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/instrsxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -400,7 +400,7 @@ INST3(aesenc, "aesenc", IUM_WR, BAD_CODE, BAD_CODE,
INST3(aesenclast, "aesenclast", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDD), INS_TT_NONE, REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform last round of an AES encryption flow
INST3(aesimc, "aesimc", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xDB), INS_TT_NONE, REX_WIG | Encoding_VEX) // Perform the AES InvMixColumn Transformation
INST3(aeskeygenassist, "aeskeygenassist", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0xDF), INS_TT_NONE, REX_WIG | Encoding_VEX) // AES Round Key Generation Assist
INST3(pclmulqdq, "pclmulqdq" , IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords
INST3(pclmulqdq, "pclmulqdq", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x44), INS_TT_FULL_MEM, Input_64Bit | REX_WIG | Encoding_VEX | Encoding_EVEX | INS_Flags_IsDstDstSrcAVXInstruction) // Perform a carry-less multiplication of two quadwords

// SSE4.1
INST3(blendpd, "blendpd", IUM_WR, BAD_CODE, BAD_CODE, SSE3A(0x0D), INS_TT_NONE, Input_64Bit | REX_WIG | Encoding_VEX | INS_Flags_IsDstDstSrcAVXInstruction) // Blend Packed Double Precision Floating-Point Values
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/jitconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,7 @@ RELEASE_CONFIG_INTEGER(EnableBMI2, "EnableBMI2",
RELEASE_CONFIG_INTEGER(EnableFMA, "EnableFMA", 1) // Allows FMA+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableLZCNT, "EnableLZCNT", 1) // Allows LZCNT+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnablePCLMULQDQ, "EnablePCLMULQDQ", 1) // Allows PCLMULQDQ+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableVPCLMULQDQ, "EnableVPCLMULQDQ", 1) // Allows VPCLMULQDQ+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnablePOPCNT, "EnablePOPCNT", 1) // Allows POPCNT+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableSSE, "EnableSSE", 1) // Allows SSE+ hardware intrinsics to be disabled
RELEASE_CONFIG_INTEGER(EnableSSE2, "EnableSSE2", 1) // Allows SSE2+ hardware intrinsics to be disabled
Expand Down
Loading

0 comments on commit 08a36ca

Please sign in to comment.