Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Updating the JIT to handle the FMA hardware intrinsics #18105

Merged
merged 3 commits into from
May 25, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/jit/codegenlinear.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ void genHWIntrinsic(GenTreeHWIntrinsic* node);
#if defined(_TARGET_XARCH_)
void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins);
void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins);
void genHWIntrinsic_R_R_R_RM(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTree* op3);
void genSSEIntrinsic(GenTreeHWIntrinsic* node);
void genSSE2Intrinsic(GenTreeHWIntrinsic* node);
void genSSE41Intrinsic(GenTreeHWIntrinsic* node);
Expand Down
66 changes: 35 additions & 31 deletions src/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2421,18 +2421,17 @@ static bool configEnableISA(InstructionSet isa)
return JitConfig.EnableSSE42() != 0;
case InstructionSet_AVX:
return JitConfig.EnableAVX() != 0;
case InstructionSet_FMA:
return JitConfig.EnableFMA() != 0;
case InstructionSet_AVX2:
// Don't enable AVX2 when AVX is disabled
return (JitConfig.EnableAVX() != 0) && (JitConfig.EnableAVX2() != 0);
return JitConfig.EnableAVX2() != 0;

case InstructionSet_AES:
return JitConfig.EnableAES() != 0;
case InstructionSet_BMI1:
return JitConfig.EnableBMI1() != 0;
case InstructionSet_BMI2:
return JitConfig.EnableBMI2() != 0;
case InstructionSet_FMA:
return JitConfig.EnableFMA() != 0;
case InstructionSet_LZCNT:
return JitConfig.EnableLZCNT() != 0;
case InstructionSet_PCLMULQDQ:
Expand All @@ -2443,8 +2442,8 @@ static bool configEnableISA(InstructionSet isa)
return false;
}
#else
// We have a retail config switch that can disable AVX/AVX2 instructions
if ((isa == InstructionSet_AVX) || (isa == InstructionSet_AVX2))
// We have a retail config switch that can disable AVX/FMA/AVX2 instructions
if ((isa == InstructionSet_AVX) || (isa == InstructionSet_FMA) || (isa == InstructionSet_AVX2))
{
return JitConfig.EnableAVX() != 0;
}
Expand Down Expand Up @@ -2513,22 +2512,6 @@ void Compiler::compSetProcessor()
opts.setSupportedISA(InstructionSet_AES);
}
}
if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX))
{
if (configEnableISA(InstructionSet_AVX))
{
opts.setSupportedISA(InstructionSet_AVX);
}
}
if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2))
{
// COMPlus_EnableAVX is also used to control the code generation of
// System.Numerics.Vectors and floating-point arithmetics
if (configEnableISA(InstructionSet_AVX) && configEnableISA(InstructionSet_AVX2))
{
opts.setSupportedISA(InstructionSet_AVX2);
}
}
if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_BMI1))
{
if (configEnableISA(InstructionSet_BMI1))
Expand All @@ -2543,13 +2526,6 @@ void Compiler::compSetProcessor()
opts.setSupportedISA(InstructionSet_BMI2);
}
}
if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_FMA))
{
if (configEnableISA(InstructionSet_FMA))
{
opts.setSupportedISA(InstructionSet_FMA);
}
}
if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_LZCNT))
{
if (configEnableISA(InstructionSet_LZCNT))
Expand All @@ -2572,8 +2548,8 @@ void Compiler::compSetProcessor()
}
}

// There are currently two sets of flags that control SSE3 through SSE4.2 support
// This is the general EnableSSE3_4 flag and the individual ISA flags. We need to
// There are currently two sets of flags that control SSE3 through SSE4.2 support:
// These are the general EnableSSE3_4 flag and the individual ISA flags. We need to
// check both for any given ISA.
if (JitConfig.EnableSSE3_4())
{
Expand Down Expand Up @@ -2606,6 +2582,34 @@ void Compiler::compSetProcessor()
}
}
}

// There are currently two sets of flags that control AVX, FMA, and AVX2 support:
// These are the general EnableAVX flag and the individual ISA flags. We need to
// check both for any given isa.
if (JitConfig.EnableAVX())
{
if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX))
{
if (configEnableISA(InstructionSet_AVX))
{
opts.setSupportedISA(InstructionSet_AVX);
}
}
if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_FMA))
{
if (configEnableISA(InstructionSet_FMA))
{
opts.setSupportedISA(InstructionSet_FMA);
}
}
if (jitFlags.IsSet(JitFlags::JIT_FLAG_USE_AVX2))
{
if (configEnableISA(InstructionSet_AVX2))
{
opts.setSupportedISA(InstructionSet_AVX2);
}
}
}
}

if (!compIsForInlining())
Expand Down
189 changes: 187 additions & 2 deletions src/jit/emitxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ bool IsAVXOnlyInstruction(instruction ins)
return (ins >= INS_FIRST_AVX_INSTRUCTION) && (ins <= INS_LAST_AVX_INSTRUCTION);
}

bool IsFMAInstruction(instruction ins)
{
return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
}

bool emitter::IsAVXInstruction(instruction ins)
{
return UseVEXEncoding() && IsSSEOrAVXInstruction(ins);
Expand Down Expand Up @@ -206,6 +211,66 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
case INS_unpcklps:
case INS_unpckhpd:
case INS_unpcklpd:
case INS_vfmadd132pd:
case INS_vfmadd213pd:
case INS_vfmadd231pd:
case INS_vfmadd132ps:
case INS_vfmadd213ps:
case INS_vfmadd231ps:
case INS_vfmadd132sd:
case INS_vfmadd213sd:
case INS_vfmadd231sd:
case INS_vfmadd132ss:
case INS_vfmadd213ss:
case INS_vfmadd231ss:
case INS_vfmaddsub132pd:
case INS_vfmaddsub213pd:
case INS_vfmaddsub231pd:
case INS_vfmaddsub132ps:
case INS_vfmaddsub213ps:
case INS_vfmaddsub231ps:
case INS_vfmsubadd132pd:
case INS_vfmsubadd213pd:
case INS_vfmsubadd231pd:
case INS_vfmsubadd132ps:
case INS_vfmsubadd213ps:
case INS_vfmsubadd231ps:
case INS_vfmsub132pd:
case INS_vfmsub213pd:
case INS_vfmsub231pd:
case INS_vfmsub132ps:
case INS_vfmsub213ps:
case INS_vfmsub231ps:
case INS_vfmsub132sd:
case INS_vfmsub213sd:
case INS_vfmsub231sd:
case INS_vfmsub132ss:
case INS_vfmsub213ss:
case INS_vfmsub231ss:
case INS_vfnmadd132pd:
case INS_vfnmadd213pd:
case INS_vfnmadd231pd:
case INS_vfnmadd132ps:
case INS_vfnmadd213ps:
case INS_vfnmadd231ps:
case INS_vfnmadd132sd:
case INS_vfnmadd213sd:
case INS_vfnmadd231sd:
case INS_vfnmadd132ss:
case INS_vfnmadd213ss:
case INS_vfnmadd231ss:
case INS_vfnmsub132pd:
case INS_vfnmsub213pd:
case INS_vfnmsub231pd:
case INS_vfnmsub132ps:
case INS_vfnmsub213ps:
case INS_vfnmsub231ps:
case INS_vfnmsub132sd:
case INS_vfnmsub213sd:
case INS_vfnmsub231sd:
case INS_vfnmsub132ss:
case INS_vfnmsub213ss:
case INS_vfnmsub231ss:
case INS_vinsertf128:
case INS_vinserti128:
case INS_vmaskmovps:
Expand Down Expand Up @@ -368,6 +433,36 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
case INS_vpsllvq:
case INS_pinsrq:
case INS_pextrq:
case INS_vfmadd132pd:
case INS_vfmadd213pd:
case INS_vfmadd231pd:
case INS_vfmadd132sd:
case INS_vfmadd213sd:
case INS_vfmadd231sd:
case INS_vfmaddsub132pd:
case INS_vfmaddsub213pd:
case INS_vfmaddsub231pd:
case INS_vfmsubadd132pd:
case INS_vfmsubadd213pd:
case INS_vfmsubadd231pd:
case INS_vfmsub132pd:
case INS_vfmsub213pd:
case INS_vfmsub231pd:
case INS_vfmsub132sd:
case INS_vfmsub213sd:
case INS_vfmsub231sd:
case INS_vfnmadd132pd:
case INS_vfnmadd213pd:
case INS_vfnmadd231pd:
case INS_vfnmadd132sd:
case INS_vfnmadd213sd:
case INS_vfnmadd231sd:
case INS_vfnmsub132pd:
case INS_vfnmsub213pd:
case INS_vfnmsub231pd:
case INS_vfnmsub132sd:
case INS_vfnmsub213sd:
case INS_vfnmsub231sd:
return true;
default:
break;
Expand Down Expand Up @@ -5360,12 +5455,85 @@ void emitter::emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber reg,
}
}

void emitter::emitIns_SIMD_R_R_R_A(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, GenTreeIndir* indir)

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In this (and subsequent), I would name the first reg argument targetReg or dstReg or something. Unlike some of the other methods, this one is designed only to support the case where the first argument is the dest, so it would be good to be descrptive.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can I submit a separate PR fixing up all the register names here? Currently the majority are regNumber reg where it should be regNumber targetReg and it would be nice to fix them all up (I could also do it in this PR, if you think that is fine).

{
assert(IsFMAInstruction(ins));
assert(UseVEXEncoding());

if (reg != reg1)
{
// Ensure we aren't overwriting op2
assert(reg != reg2);

emitIns_R_R(INS_movaps, attr, reg, reg1);
}

emitIns_R_R_A(ins, attr, reg, reg2, indir, IF_RWR_RRD_ARD);
}

void emitter::emitIns_SIMD_R_R_R_AR(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, regNumber base)
{
assert(IsFMAInstruction(ins));
assert(UseVEXEncoding());

if (reg != reg1)
{
// Ensure we aren't overwriting op2
assert(reg != reg2);

emitIns_R_R(INS_movaps, attr, reg, reg1);
}

emitIns_R_R_AR(ins, attr, reg, reg2, base, 0);
}

void emitter::emitIns_SIMD_R_R_R_C(instruction ins,
emitAttr attr,
regNumber reg,
regNumber reg1,
regNumber reg2,
CORINFO_FIELD_HANDLE fldHnd,
int offs)
{
assert(IsFMAInstruction(ins));
assert(UseVEXEncoding());

if (reg != reg1)
{
// Ensure we aren't overwriting op2
assert(reg != reg2);

emitIns_R_R(INS_movaps, attr, reg, reg1);
}

emitIns_R_R_C(ins, attr, reg, reg2, fldHnd, offs);
}

void emitter::emitIns_SIMD_R_R_R_R(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, regNumber reg3)
{
assert(isAvxBlendv(ins) || isSse41Blendv(ins));
if (UseVEXEncoding())
if (IsFMAInstruction(ins))
{
assert(UseVEXEncoding());

if (reg != reg1)
{
// Ensure we aren't overwriting op2 or op3

assert(reg != reg2);
assert(reg != reg3);

emitIns_R_R(INS_movaps, attr, reg, reg1);
}

emitIns_R_R_R(ins, attr, reg, reg2, reg3);
}
else if (UseVEXEncoding())
{
assert(isAvxBlendv(ins) || isSse41Blendv(ins));

// convert SSE encoding of SSE4.1 instructions to VEX encoding
switch (ins)
{
Expand Down Expand Up @@ -5407,6 +5575,23 @@ void emitter::emitIns_SIMD_R_R_R_R(
}
}

void emitter::emitIns_SIMD_R_R_R_S(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, int varx, int offs)
{
assert(IsFMAInstruction(ins));
assert(UseVEXEncoding());

if (reg != reg1)
{
// Ensure we aren't overwriting op2
assert(reg != reg2);

emitIns_R_R(INS_movaps, attr, reg, reg1);
}

emitIns_R_R_S(ins, attr, reg, reg2, varx, offs);
}

void emitter::emitIns_SIMD_R_R_S(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs)
{
if (UseVEXEncoding())
Expand Down
13 changes: 13 additions & 0 deletions src/jit/emitxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -422,8 +422,21 @@ void emitIns_SIMD_R_R_C(
void emitIns_SIMD_R_R_S(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int varx, int offs);
void emitIns_SIMD_R_R_R(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2);
void emitIns_SIMD_R_R_I(instruction ins, emitAttr attr, regNumber reg, regNumber reg1, int ival);
void emitIns_SIMD_R_R_R_A(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, GenTreeIndir* indir);
void emitIns_SIMD_R_R_R_AR(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, regNumber base);
void emitIns_SIMD_R_R_R_C(instruction ins,
emitAttr attr,
regNumber reg,
regNumber reg1,
regNumber reg2,
CORINFO_FIELD_HANDLE fldHnd,
int offs);
void emitIns_SIMD_R_R_R_R(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, regNumber reg3);
void emitIns_SIMD_R_R_R_S(
instruction ins, emitAttr attr, regNumber reg, regNumber reg1, regNumber reg2, int varx, int offs);
#endif // FEATURE_HW_INTRINSICS

enum EmitCallType
Expand Down
12 changes: 12 additions & 0 deletions src/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17421,7 +17421,19 @@ bool GenTree::isRMWHWIntrinsic(Compiler* comp)

switch (AsHWIntrinsic()->gtHWIntrinsicId)
{
// TODO-XArch-Cleanup: Move this switch block to be table driven.

case NI_SSE42_Crc32:
case NI_FMA_MultiplyAdd:
case NI_FMA_MultiplyAddNegated:
case NI_FMA_MultiplyAddNegatedScalar:
case NI_FMA_MultiplyAddScalar:
case NI_FMA_MultiplyAddSubtract:
case NI_FMA_MultiplySubtract:
case NI_FMA_MultiplySubtractAdd:
case NI_FMA_MultiplySubtractNegated:
case NI_FMA_MultiplySubtractNegatedScalar:
case NI_FMA_MultiplySubtractScalar:
return true;

default:
Expand Down
Loading