Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Optimize AVX Insert/Extract intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
FeiPengIntel committed Mar 19, 2018
1 parent 2205498 commit a7a6953
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 153 deletions.
77 changes: 77 additions & 0 deletions src/jit/hwintrinsicxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,83 @@ GenTree* Compiler::impAvxOrAvx2Intrinsic(NamedIntrinsic intrinsic,

switch (intrinsic)
{
case NI_AVX_Extract:
{
// Avx.Extract executes software implementation when the imm8 argument is not complie-time constant
assert(!mustExpand);

GenTree* lastOp = impPopStack().val;
GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
assert(lastOp->IsCnsIntOrI());
int ival = (int)lastOp->AsIntCon()->IconValue();
baseType = getBaseTypeOfSIMDType(info.compCompHnd->getArgClass(sig, sig->args));
var_types retType = JITtype2varType(sig->retType);
assert(varTypeIsArithmetic(baseType));

ival = ival & (32 / genTypeSize(baseType) - 1); // clear the unused bits
int halfIndex = 16 / genTypeSize(baseType);
NamedIntrinsic extractIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Extract : NI_SSE41_Extract;
GenTree* half = nullptr;

if (ival >= halfIndex)
{
half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1), NI_AVX_ExtractVector128,
baseType, 32);
ival -= halfIndex;
}
else
{
half = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32);
}

retNode = gtNewSimdHWIntrinsicNode(retType, half, gtNewIconNode(ival), extractIntrinsic, baseType, 16);
break;
}

case NI_AVX_Insert:
{
// Avx.Extract executes software implementation when the imm8 argument is not complie-time constant
assert(!mustExpand);

GenTree* lastOp = impPopStack().val;
GenTree* dataOp = impPopStack().val;
GenTree* vectorOp = impSIMDPopStack(TYP_SIMD32);
assert(lastOp->IsCnsIntOrI());
int ival = (int)lastOp->AsIntCon()->IconValue();
baseType = getBaseTypeOfSIMDType(sig->retTypeSigClass);
assert(varTypeIsArithmetic(baseType));

ival = ival & (32 / genTypeSize(baseType) - 1); // clear the unused bits
int halfIndex = 16 / genTypeSize(baseType);
NamedIntrinsic insertIntrinsic = varTypeIsShort(baseType) ? NI_SSE2_Insert : NI_SSE41_Insert;

GenTree* clonedVectorOp;
vectorOp =
impCloneExpr(vectorOp, &clonedVectorOp, info.compCompHnd->getArgClass(sig, sig->args),
(unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("AVX Insert clones the vector operand"));

if (ival >= halfIndex)
{
GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, gtNewIconNode(1),
NI_AVX_ExtractVector128, baseType, 32);
GenTree* ModifiedHalfVector =
gtNewSimdHWIntrinsicNode(TYP_SIMD16, halfVector, dataOp, gtNewIconNode(ival - halfIndex),
insertIntrinsic, baseType, 16);
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(1),
NI_AVX_InsertVector128, baseType, 32);
}
else
{
GenTree* halfVector = gtNewSimdHWIntrinsicNode(TYP_SIMD16, vectorOp, NI_AVX_GetLowerHalf, baseType, 32);
GenTree* ModifiedHalfVector =
gtNewSimdHWIntrinsicNode(TYP_SIMD32, halfVector, dataOp, gtNewIconNode(ival), insertIntrinsic,
baseType, 16);
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD32, clonedVectorOp, ModifiedHalfVector, gtNewIconNode(15),
NI_AVX_Blend, TYP_FLOAT, 32);
}
break;
}

case NI_AVX_ExtractVector128:
case NI_AVX2_ExtractVector128:
{
Expand Down
Loading

0 comments on commit a7a6953

Please sign in to comment.