Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RISCV][CostModel] VPIntrinsics have same cost as their non-vp counterparts #67178

Merged
merged 7 commits into from
Oct 5, 2023
56 changes: 56 additions & 0 deletions llvm/include/llvm/CodeGen/BasicTTIImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1687,6 +1687,62 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
}
}

// VP Intrinsics should have the same cost as their non-vp counterpart.
// TODO: Adjust the cost to make the vp intrinsic cheaper than its non-vp
// counterpart when the vector length argument is smaller than the maximum
// vector length.
if (VPIntrinsic::isVPIntrinsic(ICA.getID())) {
std::optional<unsigned> FOp =
VPIntrinsic::getFunctionalOpcodeForVP(ICA.getID());
if (FOp) {
// TODO: Support other kinds of Intrinsics (i.e. reductions)
if (ICA.getID() == Intrinsic::vp_load) {
Align Alignment;
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
unsigned AS = 0;
if (ICA.getArgs().size() > 1)
if (auto *PtrTy =
dyn_cast<PointerType>(ICA.getArgs()[0]->getType()))
AS = PtrTy->getAddressSpace();
return thisT()->getMemoryOpCost(*FOp, ICA.getReturnType(), Alignment,
AS, CostKind);
}
if (ICA.getID() == Intrinsic::vp_store) {
Align Alignment;
if (auto *VPI = dyn_cast_or_null<VPIntrinsic>(ICA.getInst()))
Alignment = VPI->getPointerAlignment().valueOrOne();
unsigned AS = 0;
if (ICA.getArgs().size() >= 2)
if (auto *PtrTy =
dyn_cast<PointerType>(ICA.getArgs()[1]->getType()))
AS = PtrTy->getAddressSpace();
return thisT()->getMemoryOpCost(*FOp, Args[0]->getType(), Alignment,
AS, CostKind);
}
if (VPBinOpIntrinsic::isVPBinOp(ICA.getID())) {
return thisT()->getArithmeticInstrCost(*FOp, ICA.getReturnType(),
CostKind);
}
}

std::optional<Intrinsic::ID> FID =
VPIntrinsic::getFunctionalIntrinsicIDForVP(ICA.getID());
if (FID) {
// Non-vp version will have same Args/Tys except mask and vector length.
assert(ICA.getArgs().size() >= 2 && ICA.getArgTypes().size() >= 2 &&
"Expected VPIntrinsic to have Mask and Vector Length args and "
"types");
ArrayRef<const Value *> NewArgs = ArrayRef(ICA.getArgs()).drop_back(2);
ArrayRef<Type *> NewTys = ArrayRef(ICA.getArgTypes()).drop_back(2);

IntrinsicCostAttributes NewICA(*FID, ICA.getReturnType(), NewArgs,
NewTys, ICA.getFlags(), ICA.getInst(),
ICA.getScalarizationCost());
return thisT()->getIntrinsicInstrCost(NewICA, CostKind);
}
}

// Assume that we need to scalarize this intrinsic.
// Compute the scalarization overhead based on Args for a vector
// intrinsic.
Expand Down
8 changes: 4 additions & 4 deletions llvm/test/Analysis/CostModel/RISCV/gep.ll
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = getelementptr i8, ptr %base, i32 42
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = getelementptr i8, ptr %base, i32 42
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = getelementptr i8, ptr %base, i32 42
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = getelementptr i8, ptr %base, i32 42
Expand All @@ -282,7 +282,7 @@ define void @non_foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = getelementptr i8, ptr %base, i32 42
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = getelementptr i8, ptr %base, i32 42
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = getelementptr i8, ptr %base, i32 42
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
Expand Down Expand Up @@ -340,7 +340,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %4 = getelementptr i8, ptr %base, i32 0
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x4 = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %4, <2 x i1> undef, <2 x i8> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %5 = getelementptr i8, ptr %base, i32 0
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %x5 = call <2 x i8> @llvm.vp.load.v2i8.p0(ptr %5, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %6 = getelementptr i8, ptr %base, i32 0
; RVI-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %x6 = call <2 x i8> @llvm.experimental.vp.strided.load.v2i8.p0.i64(ptr %6, i64 undef, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %7 = getelementptr i8, ptr %base, i32 0
Expand All @@ -352,7 +352,7 @@ define void @foldable_vector_uses(ptr %base, <2 x ptr> %base.vec) {
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %10 = getelementptr i8, ptr %base, i32 0
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v2i8(<2 x i8> undef, ptr %10, <2 x i1> undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %11 = getelementptr i8, ptr %base, i32 0
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.vp.store.v2i8.p0(<2 x i8> undef, ptr %11, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %12 = getelementptr i8, ptr %base, i32 0
; RVI-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.experimental.vp.strided.store.v2i8.p0.i64(<2 x i8> undef, ptr %12, i64 undef, <2 x i1> undef, i32 undef)
; RVI-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
Expand Down
Loading