Skip to content

Commit

Permalink
Fix a couple issues with Vector128.Get/WithElement (#52985)
Browse files Browse the repository at this point in the history
* Fix an issue with Vector128.WithElement around unused nodes for pre SSE4.1

* Fixing the expected exception for a structreturn test

* Ensure we check if the baseline SIMD ISAs are supported in morph

* Ensure TYP_SIMD12 LclVar can be cloned in lowering

* Fixing up the non SSE41 path for WithElement

* Applying formatting patch

* Ensure ReplaceWithLclVar lowers the created LclVar and assignment

* Don't check the JitLog for compiled methods when the baseline ISAs aren't supported

* Address PR feedback

* Responding to more PR feedback

* Applying formatting patch

* Fixing the more PR review feedback
  • Loading branch information
tannergooding authored May 25, 2021
1 parent 959c327 commit 5f15498
Show file tree
Hide file tree
Showing 12 changed files with 190 additions and 119 deletions.
17 changes: 17 additions & 0 deletions src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -8228,6 +8228,23 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
*/

bool IsBaselineSimdIsaSupported()
{
#ifdef FEATURE_SIMD
#if defined(TARGET_XARCH)
CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
#elif defined(TARGET_ARM64)
CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64

return compOpportunisticallyDependsOn(minimumIsa) && JitConfig.EnableHWIntrinsic();
#else
return false;
#endif
}

// Get highest available level for SIMD codegen
SIMDLevel getSIMDSupportLevel()
{
Expand Down
8 changes: 7 additions & 1 deletion src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6507,7 +6507,13 @@ GenTree* Compiler::gtNewLclvNode(unsigned lnum, var_types type DEBUGARG(IL_OFFSE
// should be able to remove this exception and handle the assignment mismatch in
// Lowering.
LclVarDsc* varDsc = lvaGetDesc(lnum);
assert((type == varDsc->lvType) ||

bool simd12ToSimd16Widening = false;
#if FEATURE_SIMD
// We can additionally have a SIMD12 that was widened to a SIMD16, generally as part of lowering
simd12ToSimd16Widening = (type == TYP_SIMD16) && (varDsc->lvType == TYP_SIMD12);
#endif
assert((type == varDsc->lvType) || simd12ToSimd16Widening ||
(lvaIsImplicitByRefLocal(lnum) && fgGlobalMorph && (varDsc->lvType == TYP_BYREF)) ||
((varDsc->lvType == TYP_STRUCT) && (genTypeSize(type) == varDsc->lvExactSize)));
}
Expand Down
7 changes: 6 additions & 1 deletion src/coreclr/jit/lir.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,11 @@ void LIR::Use::ReplaceWith(Compiler* compiler, GenTree* replacement)
// lclNum - The local to use for temporary storage. If BAD_VAR_NUM (the
// default) is provided, this method will create and use a new
// local var.
// assign - On return, if non null, contains the created assignment node
//
// Return Value: The number of the local var used for temporary storage.
//
unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum)
unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum, GenTree** assign)
{
assert(IsInitialized());
assert(compiler != nullptr);
Expand Down Expand Up @@ -277,6 +278,10 @@ unsigned LIR::Use::ReplaceWithLclVar(Compiler* compiler, unsigned lclNum)
JITDUMP("ReplaceWithLclVar created store :\n");
DISPNODE(store);

if (assign != nullptr)
{
*assign = store;
}
return lclNum;
}

Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lir.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ class LIR final
bool IsDummyUse() const;

void ReplaceWith(Compiler* compiler, GenTree* replacement);
unsigned ReplaceWithLclVar(Compiler* compiler, unsigned lclNum = BAD_VAR_NUM);
unsigned ReplaceWithLclVar(Compiler* compiler, unsigned lclNum = BAD_VAR_NUM, GenTree** assign = nullptr);
};

//------------------------------------------------------------------------
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -217,9 +217,18 @@ class Lowering final : public Phase
GenTree* oldUseNode = use.Def();
if ((oldUseNode->gtOper != GT_LCL_VAR) || (tempNum != BAD_VAR_NUM))
{
use.ReplaceWithLclVar(comp, tempNum);
GenTree* assign;
use.ReplaceWithLclVar(comp, tempNum, &assign);

GenTree* newUseNode = use.Def();
ContainCheckRange(oldUseNode->gtNext, newUseNode);

// We need to lower the LclVar and assignment since there may be certain
// types or scenarios, such as TYP_SIMD12, that need special handling

LowerNode(assign);
LowerNode(newUseNode);

return newUseNode->AsLclVar();
}
return oldUseNode->AsLclVar();
Expand Down
91 changes: 65 additions & 26 deletions src/coreclr/jit/lowerxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3007,15 +3007,16 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)

NamedIntrinsic resIntrinsic = NI_Illegal;

idx = comp->gtNewIconNode(imm8);
BlockRange().InsertBefore(node, idx);

switch (simdBaseType)
{
case TYP_LONG:
case TYP_ULONG:
{
op2 = idx;
idx = comp->gtNewIconNode(imm8);
BlockRange().InsertBefore(node, idx);

op1 = comp->gtNewArgList(op1, op3, idx);
op2 = nullptr;
resIntrinsic = NI_SSE41_X64_Insert;
break;
}
Expand All @@ -3033,7 +3034,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)

tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op3, NI_Vector128_CreateScalarUnsafe, CORINFO_TYPE_FLOAT,
16);
BlockRange().InsertBefore(idx, tmp1);
BlockRange().InsertBefore(node, tmp1);
LowerNode(tmp1);

if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE41))
Expand Down Expand Up @@ -3088,26 +3089,36 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
ssize_t controlBits1;
ssize_t controlBits2;

// The comments beside the control bits below are listed using the managed API operands
//
// In practice, for the first step the value being inserted (op3) is in tmp1
// while the other elements of the result (op1) are in tmp2. The result ends
// up containing the value being inserted and its immediate neighbor.
//
// The second step takes that result (which is in op1) plus the other elements
// from op2 (a clone of op1/tmp2 from the previous step) and combines them to
// create the final result.

switch (imm8)
{
case 1:
{
controlBits1 = 0;
controlBits2 = 226;
controlBits1 = 0; // 00 00 00 00; op1 = { X = op3, Y = op3, Z = op1.X, W = op1.X }
controlBits2 = 226; // 11 10 00 10; node = { X = op1.X, Y = op3, Z = op1.Z, W = op1.W }
break;
}

case 2:
{
controlBits1 = 48;
controlBits2 = 132;
controlBits1 = 15; // 00 00 11 11; op1 = { X = op1.W, Y = op1.W, Z = op3, W = op3 }
controlBits2 = 36; // 00 10 01 00; node = { X = op1.X, Y = op1.Y, Z = op3, W = op1.W }
break;
}

case 3:
{
controlBits1 = 32;
controlBits2 = 36;
controlBits1 = 10; // 00 00 10 10; op1 = { X = op1.Z, Y = op1.Z, Z = op3, W = op3 }
controlBits2 = 132; // 10 00 01 00; node = { X = op1.X, Y = op1.Y, Z = op1.Z, W = op3 }
break;
}

Expand All @@ -3118,19 +3129,24 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
idx = comp->gtNewIconNode(controlBits1);
BlockRange().InsertAfter(tmp2, idx);

if (imm8 == 1)
if (imm8 != 1)
{
std::swap(tmp1, tmp2);
}

op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp2, tmp1, idx, NI_SSE_Shuffle,
op1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, tmp1, tmp2, idx, NI_SSE_Shuffle,
CORINFO_TYPE_FLOAT, 16);
BlockRange().InsertAfter(idx, op1);
LowerNode(op1);

idx = comp->gtNewIconNode(controlBits2);
BlockRange().InsertAfter(op1, idx);

if (imm8 != 1)
{
std::swap(op1, op2);
}

op1 = comp->gtNewArgList(op1, op2, idx);
op2 = nullptr;
resIntrinsic = NI_SSE_Shuffle;
Expand All @@ -3139,8 +3155,8 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
}
else
{
op3 = tmp1;
idx->AsIntCon()->SetIconValue(imm8 * 16);
imm8 = imm8 * 16;
op3 = tmp1;
FALLTHROUGH;
}
}
Expand All @@ -3150,6 +3166,9 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
case TYP_INT:
case TYP_UINT:
{
idx = comp->gtNewIconNode(imm8);
BlockRange().InsertBefore(node, idx);

op1 = comp->gtNewArgList(op1, op3, idx);
op2 = nullptr;
resIntrinsic = NI_SSE41_Insert;
Expand All @@ -3159,6 +3178,9 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)
case TYP_SHORT:
case TYP_USHORT:
{
idx = comp->gtNewIconNode(imm8);
BlockRange().InsertBefore(node, idx);

op1 = comp->gtNewArgList(op1, op3, idx);
op2 = nullptr;
resIntrinsic = NI_SSE2_Insert;
Expand All @@ -3178,7 +3200,7 @@ void Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node)

tmp1 = comp->gtNewSimdHWIntrinsicNode(TYP_SIMD16, op3, NI_Vector128_CreateScalarUnsafe, CORINFO_TYPE_DOUBLE,
16);
BlockRange().InsertBefore(idx, tmp1);
BlockRange().InsertBefore(node, tmp1);
LowerNode(tmp1);

op2 = tmp1;
Expand Down Expand Up @@ -5474,8 +5496,11 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge

default:
{
// These intrinsics only expect 16 or 32-byte nodes for containment
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
{
// These intrinsics only expect 16 or 32-byte nodes for containment
break;
}

if (!comp->canUseVexEncoding())
{
Expand Down Expand Up @@ -5535,9 +5560,12 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
case NI_AVX2_ShuffleHigh:
case NI_AVX2_ShuffleLow:
{
// These intrinsics only expect 16 or 32-byte nodes for containment
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
assert(supportsSIMDScalarLoads == false);
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
{
// These intrinsics only expect 16 or 32-byte nodes for containment
break;
}
assert(!supportsSIMDScalarLoads);

supportsAlignedSIMDLoads = !comp->canUseVexEncoding() || !comp->opts.MinOpts();
supportsUnalignedSIMDLoads = comp->canUseVexEncoding();
Expand All @@ -5553,7 +5581,12 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
if (containingNode->GetSimdBaseType() == TYP_FLOAT)
{
assert(containingIntrinsicId == NI_SSE41_Insert);
assert(genTypeSize(node->TypeGet()) == 16);

if (genTypeSize(node->TypeGet()) != 16)
{
// These intrinsics only expect 16-byte nodes for containment
break;
}

// Sse41.Insert(V128<float>, V128<float>, byte) is a bit special
// in that it has different behavior depending on whether the
Expand Down Expand Up @@ -5620,8 +5653,11 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge

case NI_AVX_CompareScalar:
{
// These intrinsics only expect 16 or 32-byte nodes for containment
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
{
// These intrinsics only expect 16 or 32-byte nodes for containment
break;
}

assert(supportsAlignedSIMDLoads == false);
assert(supportsUnalignedSIMDLoads == false);
Expand Down Expand Up @@ -5700,8 +5736,11 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge

default:
{
// These intrinsics only expect 16 or 32-byte nodes for containment
assert((genTypeSize(node->TypeGet()) == 16) || (genTypeSize(node->TypeGet()) == 32));
if ((genTypeSize(node->TypeGet()) != 16) && (genTypeSize(node->TypeGet()) != 32))
{
// These intrinsics only expect 16 or 32-byte nodes for containment
break;
}

supportsSIMDScalarLoads = true;
supportsGeneralLoads = supportsSIMDScalarLoads;
Expand Down
12 changes: 8 additions & 4 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6009,11 +6009,14 @@ GenTree* Compiler::fgMorphField(GenTree* tree, MorphAddrContext* mac)
// if this field belongs to simd struct, translate it to simd intrinsic.
if (mac == nullptr)
{
GenTree* newTree = fgMorphFieldToSimdGetElement(tree);
if (newTree != tree)
if (IsBaselineSimdIsaSupported())
{
newTree = fgMorphSmpOp(newTree);
return newTree;
GenTree* newTree = fgMorphFieldToSimdGetElement(tree);
if (newTree != tree)
{
newTree = fgMorphSmpOp(newTree);
return newTree;
}
}
}
else if ((objRef != nullptr) && (objRef->OperGet() == GT_ADDR) && varTypeIsSIMD(objRef->gtGetOp1()))
Expand Down Expand Up @@ -12238,6 +12241,7 @@ GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac)
op2 = tree->AsOp()->gtOp2;

#ifdef FEATURE_SIMD
if (IsBaselineSimdIsaSupported())
{
// We should check whether op2 should be assigned to a SIMD field or not.
// If it is, we should tranlate the tree to simd intrinsic.
Expand Down
10 changes: 1 addition & 9 deletions src/coreclr/jit/simd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1901,15 +1901,7 @@ GenTree* Compiler::impSIMDIntrinsic(OPCODE opcode,
return nullptr;
}

#if defined(TARGET_XARCH)
CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
#elif defined(TARGET_ARM64)
CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64

if (!compOpportunisticallyDependsOn(minimumIsa) || !JitConfig.EnableHWIntrinsic())
if (!IsBaselineSimdIsaSupported())
{
// The user disabled support for the baseline ISA so
// don't emit any SIMD intrinsics as they all require
Expand Down
10 changes: 1 addition & 9 deletions src/coreclr/jit/simdashwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -176,15 +176,7 @@ GenTree* Compiler::impSimdAsHWIntrinsic(NamedIntrinsic intrinsic,
return nullptr;
}

#if defined(TARGET_XARCH)
CORINFO_InstructionSet minimumIsa = InstructionSet_SSE2;
#elif defined(TARGET_ARM64)
CORINFO_InstructionSet minimumIsa = InstructionSet_AdvSimd;
#else
#error Unsupported platform
#endif // !TARGET_XARCH && !TARGET_ARM64

if (!compOpportunisticallyDependsOn(minimumIsa) || !JitConfig.EnableHWIntrinsic())
if (!IsBaselineSimdIsaSupported())
{
// The user disabled support for the baseline ISA so
// don't emit any SIMD intrinsics as they all require
Expand Down
2 changes: 1 addition & 1 deletion src/tests/JIT/Directed/StructABI/structreturn.cs
Original file line number Diff line number Diff line change
Expand Up @@ -1332,7 +1332,7 @@ private static void TestReturnViaThrowing<T>() where T : struct
T value = vector[Vector<T>.Count];
System.Diagnostics.Debug.Assert(false);
}
catch (IndexOutOfRangeException)
catch (ArgumentOutOfRangeException)
{
return;
}
Expand Down
Loading

0 comments on commit 5f15498

Please sign in to comment.