Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Implement SetAllVector256
Browse files Browse the repository at this point in the history
  • Loading branch information
FeiPengIntel committed Mar 19, 2018
1 parent 3be51e7 commit 46549e9
Show file tree
Hide file tree
Showing 7 changed files with 406 additions and 0 deletions.
67 changes: 67 additions & 0 deletions src/jit/hwintrinsiccodegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1311,6 +1311,73 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_AVX_SetAllVector256:
{
assert(op1 != nullptr);
assert(op2 == nullptr);
op1Reg = op1->gtRegNum;
if (varTypeIsIntegral(baseType))
{
// If the argument is a integer, it needs to be moved into a XMM register
regNumber tmpXMM = node->ExtractTempReg();
if (varTypeIsLong(baseType))
{
emit->emitIns_R_R(INS_mov_i2xmm, emitTypeSize(baseType), tmpXMM, op1Reg);
}
else
{
emit->emitIns_R_R(INS_mov_i2xmm, emitTypeSize(TYP_SIMD16), tmpXMM, op1Reg);
}
op1Reg = tmpXMM;
}

if (compiler->compSupports(InstructionSet_AVX2))
{
emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg);
}
else
{
// duplicate the scalar argument to XMM register
switch (baseType)
{
case TYP_FLOAT:
emit->emitIns_SIMD_R_R_I(INS_vpermilps, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, 0);
break;
case TYP_DOUBLE:
emit->emitIns_R_R(INS_movddup, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg);
break;
case TYP_BYTE:
case TYP_UBYTE:
{
regNumber tmpZeroReg = node->GetSingleTempReg();
emit->emitIns_R_R(INS_pxor, emitTypeSize(TYP_SIMD16), tmpZeroReg, tmpZeroReg);
emit->emitIns_SIMD_R_R_R(INS_pshufb, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, tmpZeroReg);
break;
}
case TYP_SHORT:
case TYP_USHORT:
emit->emitIns_SIMD_R_R_I(INS_pshuflw, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, 0);
emit->emitIns_SIMD_R_R_I(INS_pshufd, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, 80);
break;
case TYP_INT:
case TYP_UINT:
emit->emitIns_SIMD_R_R_I(INS_pshufd, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, 0);
break;
case TYP_LONG:
case TYP_ULONG:
emit->emitIns_SIMD_R_R_I(INS_pshufd, emitTypeSize(TYP_SIMD16), op1Reg, op1Reg, 68);
break;

default:
unreached();
break;
}
// duplicate the XMM register to YMM register
emit->emitIns_SIMD_R_R_R_I(INS_vinsertf128, emitTypeSize(TYP_SIMD32), targetReg, op1Reg, op1Reg, 1);
}
break;
}

case NI_AVX_ExtendToVector256:
{
// ExtendToVector256 has zero-extend semantics in order to ensure it is deterministic
Expand Down
4 changes: 4 additions & 0 deletions src/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -347,12 +347,14 @@ HARDWARE_INTRINSIC(AVX_Divide, "Divide",
HARDWARE_INTRINSIC(AVX_DotProduct, "DotProduct", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_dpps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX_DuplicateEvenIndexed, "DuplicateEvenIndexed", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_DuplicateOddIndexed, "DuplicateOddIndexed", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_Extract, "Extract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_BaseTypeFromFirstArg|HW_Flag_FullRangeIMM|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AVX_ExtendToVector256, "ExtendToVector256", AVX, -1, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_Helper, HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_ExtractVector128, "ExtractVector128", AVX, -1, 32, -1, {INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128,INS_vextractf128, INS_vextractf128},HW_Category_IMM, HW_Flag_OneTypeGeneric|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX_Floor, "Floor", AVX, 9, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_GetLowerHalf, "GetLowerHalf", AVX, -1, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_Helper, HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_HorizontalAdd, "HorizontalAdd", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_haddps, INS_haddpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX_HorizontalSubtract, "HorizontalSubtract", AVX, -1, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_hsubps, INS_hsubpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX_Insert, "Insert", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(AVX_InsertVector128, "InsertVector128", AVX, -1, 32, 3, {INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128,INS_vinsertf128, INS_vinsertf128},HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_OneTypeGeneric|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(AVX_LoadAlignedVector256, "LoadAlignedVector256", AVX, -1, 32, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_LoadDquVector256, "LoadDquVector256", AVX, -1, 32, 1, {INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_lddqu, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoRMWSemantics)
Expand All @@ -369,6 +371,8 @@ HARDWARE_INTRINSIC(AVX_RoundToNearestInteger, "RoundToNea
HARDWARE_INTRINSIC(AVX_RoundToNegativeInfinity, "RoundToNegativeInfinity", AVX, 9, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_RoundToPositiveInfinity, "RoundToPositiveInfinity", AVX, 10, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_RoundToZero, "RoundToZero", AVX, 11, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_roundps, INS_roundpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_SetVector256, "SetVector256", AVX, -1, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen|HW_Flag_SecondArgMaybe64Bit)
HARDWARE_INTRINSIC(AVX_SetAllVector256, "SetAllVector256", AVX, -1, 32, 1, {INS_vpbroadcastb,INS_vpbroadcastb,INS_vpbroadcastw,INS_vpbroadcastw,INS_vpbroadcastd,INS_vpbroadcastd,INS_vpbroadcastq,INS_vpbroadcastq,INS_vbroadcastss,INS_vbroadcastsd},HW_Category_Helper, HW_Flag_MultiIns|HW_Flag_SpecialImport|HW_Flag_OneTypeGeneric)
HARDWARE_INTRINSIC(AVX_SetZeroVector256, "SetZeroVector256", AVX, -1, 32, 0, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_xorps, INS_xorpd}, HW_Category_Helper, HW_Flag_OneTypeGeneric|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX_Shuffle, "Shuffle", AVX, -1, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_shufps, INS_shufpd}, HW_Category_IMM, HW_Flag_NoRMWSemantics|HW_Flag_FullRangeIMM)
HARDWARE_INTRINSIC(AVX_Sqrt, "Sqrt", AVX, -1, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtps, INS_sqrtpd}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics)
Expand Down
15 changes: 15 additions & 0 deletions src/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1197,6 +1197,21 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,
break;
}

case NI_AVX_SetAllVector256:
{
GenTree* arg = impPopStack().val;
baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
#ifdef _TARGET_X86_
// TODO-XARCH: support long/ulong on 32-bit platfroms
if (varTypeIsLong(baseType))
{
return impUnsupportedHWIntrinsic(CORINFO_HELP_THROW_PLATFORM_NOT_SUPPORTED, method, sig, mustExpand);
}
#endif
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, arg, NI_AVX_SetAllVector256, baseType, 32);
break;
}

case NI_AVX_ExtractVector128:
case NI_AVX2_ExtractVector128:
{
Expand Down
14 changes: 14 additions & 0 deletions src/jit/lsraxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2368,6 +2368,20 @@ void LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
break;
}

case NI_AVX_SetAllVector256:
{
if (varTypeIsIntegral(baseType))
{
info->internalFloatCount = 1;
if (!compiler->compSupports(InstructionSet_AVX2) && varTypeIsByte(baseType))
{
info->internalFloatCount += 1;
}
info->setInternalCandidates(this, allSIMDRegs());
}
break;
}

case NI_SSE2_MaskMove:
{
// SSE2 MaskMove hardcodes the destination (op3) in DI/EDI/RDI
Expand Down
Loading

0 comments on commit 46549e9

Please sign in to comment.