Skip to content

Commit

Permalink
Arm64/Sve: Implement AbsoluteCompare* and Compare* APIs (dotnet#104464)
Browse files Browse the repository at this point in the history
* Add AbsoluteCompare*() APIs

* Map API to instructions

* Add test coverage

* Add support for AbsoluteCompare

* uncomment some other tests

* Add CompareGreater* and CompareLess* APIs

* Add remaining Compare* APIs

* Map API to instructions

* fix test cases

* Add test coverage for Sve.CompareUnordered
  • Loading branch information
kunalspathak authored Jul 5, 2024
1 parent 84bda51 commit 5505150
Show file tree
Hide file tree
Showing 10 changed files with 2,326 additions and 116 deletions.
9 changes: 8 additions & 1 deletion src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -586,7 +586,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)

case 2:
{
assert(instrIsRMW);
if (!instrIsRMW)
{
// Perform the actual "predicated" operation so that `embMaskOp1Reg` is the first operand
// and `embMaskOp2Reg` is the second operand.
GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp1Reg,
embMaskOp2Reg, opt);
break;
}

insScalableOpts sopt = INS_SCALABLE_OPTS_NONE;
bool hasShift = false;
Expand Down
11 changes: 11 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@
// SVE Intrinsics
#define FIRST_NI_Sve NI_Sve_Abs
HARDWARE_INTRINSIC(Sve, Abs, -1, -1, false, {INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_abs, INS_invalid, INS_sve_fabs, INS_sve_fabs}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, AbsoluteCompareGreaterThan, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facgt, INS_sve_facgt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, AbsoluteCompareGreaterThanOrEqual, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facge, INS_sve_facge}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, AbsoluteCompareLessThan, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_faclt, INS_sve_faclt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, AbsoluteCompareLessThanOrEqual, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_facle, INS_sve_facle}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, AbsoluteDifference, -1, -1, false, {INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_fabd, INS_sve_fabd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Add, -1, -1, false, {INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_fadd, INS_sve_fadd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, AddAcross, -1, 1, true, {INS_sve_saddv, INS_sve_uaddv, INS_sve_saddv, INS_sve_uaddv, INS_sve_saddv, INS_sve_uaddv, INS_sve_uaddv, INS_sve_uaddv, INS_sve_faddv, INS_sve_faddv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand All @@ -26,6 +30,13 @@ HARDWARE_INTRINSIC(Sve, AndAcross,
HARDWARE_INTRINSIC(Sve, BitwiseClear, -1, -1, false, {INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_sve_bic, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, BooleanNot, -1, -1, false, {INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_sve_cnot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Compact, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact, INS_sve_compact}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, CompareEqual, -1, -1, false, {INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_cmpeq, INS_sve_fcmeq, INS_sve_fcmeq}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CompareGreaterThan, -1, -1, false, {INS_sve_cmpgt, INS_sve_cmphi, INS_sve_cmpgt, INS_sve_cmphi, INS_sve_cmpgt, INS_sve_cmphi, INS_sve_cmpgt, INS_sve_cmphi, INS_sve_fcmgt, INS_sve_fcmgt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CompareGreaterThanOrEqual, -1, -1, false, {INS_sve_cmpge, INS_sve_cmphs, INS_sve_cmpge, INS_sve_cmphs, INS_sve_cmpge, INS_sve_cmphs, INS_sve_cmpge, INS_sve_cmphs, INS_sve_fcmge, INS_sve_fcmge}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CompareLessThan, -1, -1, false, {INS_sve_cmplt, INS_sve_cmplo, INS_sve_cmplt, INS_sve_cmplo, INS_sve_cmplt, INS_sve_cmplo, INS_sve_cmplt, INS_sve_cmplo, INS_sve_fcmlt, INS_sve_fcmlt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CompareLessThanOrEqual, -1, -1, false, {INS_sve_cmple, INS_sve_cmpls, INS_sve_cmple, INS_sve_cmpls, INS_sve_cmple, INS_sve_cmpls, INS_sve_cmple, INS_sve_cmpls, INS_sve_fcmle, INS_sve_fcmle}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CompareNotEqualTo, -1, -1, false, {INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_cmpne, INS_sve_fcmne, INS_sve_fcmne}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CompareUnordered, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmuo, INS_sve_fcmuo}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, Compute16BitAddresses, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, Compute32BitAddresses, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, Compute64BitAddresses, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_adr, INS_invalid, INS_sve_adr, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
Expand Down
8 changes: 7 additions & 1 deletion src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1453,12 +1453,18 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
var_types simdType = Compiler::getSIMDTypeForSize(simdSize);
GenTree* trueMask = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize);
GenTree* falseVal = comp->gtNewZeroConNode(simdType);
var_types nodeType = simdType;

if (HWIntrinsicInfo::ReturnsPerElementMask(node->GetHWIntrinsicId()))
{
nodeType = TYP_MASK;
}

BlockRange().InsertBefore(node, trueMask);
BlockRange().InsertBefore(node, falseVal);

GenTreeHWIntrinsic* condSelNode =
comp->gtNewSimdHWIntrinsicNode(simdType, trueMask, node, falseVal, NI_Sve_ConditionalSelect,
comp->gtNewSimdHWIntrinsicNode(nodeType, trueMask, node, falseVal, NI_Sve_ConditionalSelect,
simdBaseJitType, simdSize);
BlockRange().InsertAfter(node, condSelNode);
if (foundUse)
Expand Down
27 changes: 2 additions & 25 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1960,30 +1960,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
getLowVectorOperandAndCandidates(intrin, &lowVectorOperandNum, &lowVectorCandidates);
}

if ((intrin.id == NI_Sve_ConditionalSelect) && (intrin.op2->IsEmbMaskOp()) &&
(intrin.op2->isRMWHWIntrinsic(compiler)))
{
// For ConditionalSelect, if there is an embedded operation, and the operation has RMW semantics
// then record delay-free for them.
GenTreeHWIntrinsic* intrinEmbOp2 = intrin.op2->AsHWIntrinsic();
size_t numArgs = intrinEmbOp2->GetOperandCount();
assert((numArgs == 1) || (numArgs == 2));
const HWIntrinsic intrinEmb(intrinEmbOp2);
if (HWIntrinsicInfo::IsLowVectorOperation(intrinEmb.id))
{
getLowVectorOperandAndCandidates(intrinEmb, &lowVectorOperandNum, &lowVectorCandidates);
}

tgtPrefUse = BuildUse(intrinEmbOp2->Op(1));
srcCount += 1;

for (size_t argNum = 2; argNum <= numArgs; argNum++)
{
srcCount += BuildDelayFreeUses(intrinEmbOp2->Op(argNum), intrinEmbOp2->Op(1),
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
}
}
else if (tgtPrefOp2)
if (tgtPrefOp2)
{
if (!intrin.op2->isContained())
{
Expand Down Expand Up @@ -2038,7 +2015,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
{
SingleTypeRegSet candidates = lowVectorOperandNum == 2 ? lowVectorCandidates : RBM_NONE;

if (intrin.op2->gtType == TYP_MASK)
if (intrin.op2->OperIsHWIntrinsic(NI_Sve_ConvertVectorToMask))
{
assert(lowVectorOperandNum != 2);
candidates = RBM_ALLMASK.GetPredicateRegSet();
Expand Down
Loading

0 comments on commit 5505150

Please sign in to comment.