Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JIT ARM64-SVE: Add CreateWhileLessThan* #100949

Merged
merged 8 commits into from
Apr 25, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -3473,7 +3473,7 @@ class Compiler

#if defined(TARGET_ARM64)
GenTree* gtNewSimdConvertVectorToMaskNode(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize);
GenTree* gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type);
GenTree* gtNewSimdConvertMaskToVectorNode(var_types type, GenTreeHWIntrinsic* node, CorInfoType simdBaseJitType, unsigned simdSize);
#endif

//------------------------------------------------------------------------
Expand Down
60 changes: 37 additions & 23 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -697,7 +697,20 @@ unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORI
}

CorInfoType simdBaseJitType = comp->getBaseJitTypeAndSizeOfSIMDType(typeHnd, &simdSize);

#if defined(TARGET_ARM64)
if (simdBaseJitType == CORINFO_TYPE_UNDEF)
{
assert(simdSize == 0); // the argument is not a vector
}
else
{
assert(simdSize > 0);
}
#else
assert((simdSize > 0) && (simdBaseJitType != CORINFO_TYPE_UNDEF));
#endif

return simdSize;
}

Expand Down Expand Up @@ -1062,49 +1075,50 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
int numArgs = sig->numArgs;
var_types retType = genActualType(JITtype2varType(sig->retType));
CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF;
CorInfoType simdRetJitType = CORINFO_TYPE_UNDEF;
GenTree* retNode = nullptr;
unsigned int simdRetSize = 0;

if (retType == TYP_STRUCT)
{
unsigned int sizeBytes;
simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes);
simdRetJitType = getBaseJitTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdRetSize);

if (HWIntrinsicInfo::IsMultiReg(intrinsic))
{
assert(sizeBytes == 0);
assert(simdRetSize == 0);
}

#ifdef TARGET_ARM64
else if ((intrinsic == NI_AdvSimd_LoadAndInsertScalar) || (intrinsic == NI_AdvSimd_Arm64_LoadAndInsertScalar))
{
CorInfoType pSimdBaseJitType = CORINFO_TYPE_UNDEF;
var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdBaseJitType);
CorInfoType pSimdRetJitType = CORINFO_TYPE_UNDEF;
var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdRetJitType);

if (retFieldType == TYP_STRUCT)
{
CORINFO_CLASS_HANDLE structType;
unsigned int sizeBytes = 0;
unsigned int simdRetSize = 0;

// LoadAndInsertScalar that returns 2,3 or 4 vectors
assert(pSimdBaseJitType == CORINFO_TYPE_UNDEF);
assert(pSimdRetJitType == CORINFO_TYPE_UNDEF);
unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sig->retTypeSigClass);
assert(fieldCount > 1);
CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(sig->retTypeClass, 0);
CorInfoType fieldType = info.compCompHnd->getFieldType(fieldHandle, &structType);
simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(structType, &sizeBytes);
simdRetJitType = getBaseJitTypeAndSizeOfSIMDType(structType, &simdRetSize);
switch (fieldCount)
{
case 2:
intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x2
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2;
intrinsic = simdRetSize == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x2
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2;
break;
case 3:
intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x3
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3;
intrinsic = simdRetSize == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x3
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3;
break;
case 4:
intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x4
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4;
intrinsic = simdRetSize == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x4
: NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4;
break;
default:
assert("unsupported");
Expand All @@ -1113,26 +1127,26 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
else
{
assert((retFieldType == TYP_SIMD8) || (retFieldType == TYP_SIMD16));
assert(isSupportedBaseType(intrinsic, simdBaseJitType));
retType = getSIMDTypeForSize(sizeBytes);
assert(isSupportedBaseType(intrinsic, simdRetJitType));
retType = getSIMDTypeForSize(simdRetSize);
}
}
#endif
else
{
// We want to return early here for cases where retType was TYP_STRUCT as per method signature and
// rather than deferring the decision after getting the simdBaseJitType of arg.
if (!isSupportedBaseType(intrinsic, simdBaseJitType))
// rather than deferring the decision after getting the simdRetJitType of arg.
if (!isSupportedBaseType(intrinsic, simdRetJitType))
{
return nullptr;
}

assert(sizeBytes != 0);
retType = getSIMDTypeForSize(sizeBytes);
assert(simdRetSize != 0);
retType = getSIMDTypeForSize(simdRetSize);
}
}

simdBaseJitType = getBaseJitTypeFromArgIfNeeded(intrinsic, clsHnd, sig, simdBaseJitType);
simdBaseJitType = getBaseJitTypeFromArgIfNeeded(intrinsic, clsHnd, sig, simdRetJitType);

if (simdBaseJitType == CORINFO_TYPE_UNDEF)
{
Expand Down Expand Up @@ -1381,7 +1395,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
}

#if defined(TARGET_ARM64)
if ((simdSize != 8) && (simdSize != 16))
if ((simdSize != 8) && (simdSize != 16) && (simdSize != 0))
a74nh marked this conversation as resolved.
Show resolved Hide resolved
#elif defined(TARGET_XARCH)
if ((simdSize != 16) && (simdSize != 32) && (simdSize != 64))
#endif // TARGET_*
Expand Down Expand Up @@ -1607,7 +1621,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
// HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector.
assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic));
assert(nodeRetType == TYP_MASK);
retNode = gtNewSimdConvertMaskToVectorNode(retNode->AsHWIntrinsic(), retType);
retNode = gtNewSimdConvertMaskToVectorNode(retType, retNode->AsHWIntrinsic(), simdRetJitType, simdRetSize);
}
#endif // defined(TARGET_ARM64)

Expand Down
20 changes: 12 additions & 8 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2204,12 +2204,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
}

//------------------------------------------------------------------------
// gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic vector node to a mask
// gtNewSimdConvertVectorToMaskNode: Convert a HW instrinsic vector node to a mask
//
// Arguments:
// node -- The node to convert
// simdBaseJitType -- the base jit type of the converted node
// simdSize -- the simd size of the converted node
// simdBaseJitType -- The base jit type of the converted node
// simdSize -- The simd size of the converted node
//
// Return Value:
// The node converted to the a mask type
Expand All @@ -2231,19 +2231,23 @@ GenTree* Compiler::gtNewSimdConvertVectorToMaskNode(var_types type,
// gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic mask node to a vector
//
// Arguments:
// node -- The node to convert
// type -- The type of the node to convert to
// node -- The node to convert
// type -- The type of the node to convert to
// simdBaseJitType -- The base jit type of node to convert to
// simdSize -- The simd size of the node to convert to
//
// Return Value:
// The node converted to the given type
//
GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type)
GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(var_types type,
GenTreeHWIntrinsic* node,
CorInfoType simdBaseJitType,
unsigned simdSize)
{
assert(varTypeIsMask(node));
assert(varTypeIsSIMD(type));

return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, node->GetSimdBaseJitType(),
node->GetSimdSize());
return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, simdBaseJitType, simdSize);
}

#endif // FEATURE_HW_INTRINSICS
36 changes: 36 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1300,6 +1300,42 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, SVE_PATTERN_ALL);
break;

case NI_Sve_CreateWhileLessThanMask8Bit:
case NI_Sve_CreateWhileLessThanOrEqualMask8Bit:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this going to be a more common pattern? Should we have a way to make it more table driven?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Problem is we need to know the name of the intrinsic in order to know the opt value (eg INS_OPTS_SCALABLE_H).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we not tracking the vector type anywhere? We have a couple fields (simdBaseJitType and altType for example) so we should be able to track both the overload type (int vs uint vs long vs ulong) and the vector base type/size

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Switched the code so that it uses the return vector type as the basetype, and set auxiliary type to arg1 type. That simplifies the code a lot and removes many of my changes.

// Emit size is the size of the scalar operands.
emitSize = emitActualTypeSize(intrin.op1->TypeGet());
// opt is based on the size of the returned vector
opt = INS_OPTS_SCALABLE_B;
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
break;

case NI_Sve_CreateWhileLessThanMask16Bit:
case NI_Sve_CreateWhileLessThanOrEqualMask16Bit:
// Emit size is the size of the scalar operands.
emitSize = emitActualTypeSize(intrin.op1->TypeGet());
// opt is based on the size of the returned vector
opt = INS_OPTS_SCALABLE_H;
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
break;

case NI_Sve_CreateWhileLessThanMask32Bit:
case NI_Sve_CreateWhileLessThanOrEqualMask32Bit:
// Emit size is the size of the scalar operands.
emitSize = emitActualTypeSize(intrin.op1->TypeGet());
// opt is based on the size of the returned vector
opt = INS_OPTS_SCALABLE_S;
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
break;

case NI_Sve_CreateWhileLessThanMask64Bit:
case NI_Sve_CreateWhileLessThanOrEqualMask64Bit:
// Emit size is the size of the scalar operands.
emitSize = emitActualTypeSize(intrin.op1->TypeGet());
// opt is based on the size of the returned vector
opt = INS_OPTS_SCALABLE_D;
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
break;

default:
unreached();
}
Expand Down
8 changes: 8 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle,
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask8Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)

HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_LowMaskedOperation)

Expand Down
Loading
Loading