Skip to content

Commit

Permalink
[RISCV] Fix the cost of llvm.vector.reduce.and (#119160)
Browse files Browse the repository at this point in the history
I added some CodeGen test cases related to reduce. To maintain
consistency, I also added cases for instructions like
`vector.reduce.or`.

For cases where `v1i1` type generates `VFIRST`, please refer to:
https://reviews.llvm.org/D139512.
  • Loading branch information
sunshaoce authored Jan 10, 2025
1 parent 41e4018 commit 369c617
Show file tree
Hide file tree
Showing 5 changed files with 279 additions and 24 deletions.
24 changes: 20 additions & 4 deletions llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1536,15 +1536,31 @@ RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
std::pair<InstructionCost, MVT> LT = getTypeLegalizationCost(Ty);
Type *ElementTy = Ty->getElementType();
if (ElementTy->isIntegerTy(1)) {
// Example sequences:
// vfirst.m a0, v0
// seqz a0, a0
if (LT.second == MVT::v1i1)
return getRISCVInstructionCost(RISCV::VFIRST_M, LT.second, CostKind) +
getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
CmpInst::ICMP_EQ, CostKind);

if (ISD == ISD::AND) {
// Example sequences:
// vsetvli a0, zero, e8, mf8, ta, ma
// vmand.mm v8, v9, v8 ; needed every time type is split
// vmnot.m v8, v0
// vmnot.m v8, v0 ; alias for vmnand
// vcpop.m a0, v8
// seqz a0, a0
return LT.first * getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second,
CostKind) +

// See the discussion: https://github.com/llvm/llvm-project/pull/119160
// For LMUL <= 8, there is no splitting,
// the sequences are vmnot, vcpop and seqz.
// When LMUL > 8 and split = 1,
// the sequences are vmnand, vcpop and seqz.
// When LMUL > 8 and split > 1,
// the sequences are (LT.first-2) * vmand, vmnand, vcpop and seqz.
return ((LT.first > 2) ? (LT.first - 2) : 0) *
getRISCVInstructionCost(RISCV::VMAND_MM, LT.second, CostKind) +
getRISCVInstructionCost(RISCV::VMNAND_MM, LT.second, CostKind) +
getRISCVInstructionCost(RISCV::VCPOP_M, LT.second, CostKind) +
getCmpSelInstrCost(Instruction::ICmp, ElementTy, ElementTy,
CmpInst::ICMP_EQ, CostKind);
Expand Down
239 changes: 239 additions & 0 deletions llvm/test/Analysis/CostModel/RISCV/reduce-and-i1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py UTC_ARGS: --version 5
; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl128b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL128B
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl128b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL128B
; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl256b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL256B
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl256b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL256B
; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl512b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL512B
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl512b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL512B
; RUN: opt < %s -mtriple=riscv32 -mattr=+v,+zvl1024b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL1024B
; RUN: opt < %s -mtriple=riscv64 -mattr=+v,+zvl1024b -passes="print<cost-model>" -cost-kind=throughput 2>&1 -disable-output \
; RUN: | FileCheck %s --check-prefixes=THROUGHPUT,THROUGHPUT-VL1024B

define zeroext i1 @vreduce_and_v1i1(<1 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_v1i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_v2i1(<2 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_v2i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_v4i1(<4 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_v4i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_v8i1(<8 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_v8i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_v16i1(<16 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_v16i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_v32i1(<32 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_v32i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_v64i1(<64 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_v64i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v64i1(<64 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_v128i1(<128 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_v128i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v128i1(<128 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_v256i1(<256 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_v256i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v256i1(<256 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_v512i1(<512 x i1> %v) {
; THROUGHPUT-VL128B-LABEL: 'vreduce_and_v512i1'
; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
; THROUGHPUT-VL256B-LABEL: 'vreduce_and_v512i1'
; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
; THROUGHPUT-VL512B-LABEL: 'vreduce_and_v512i1'
; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
; THROUGHPUT-VL1024B-LABEL: 'vreduce_and_v512i1'
; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v512i1(<512 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_v1024i1(<1024 x i1> %v) {
; THROUGHPUT-VL128B-LABEL: 'vreduce_and_v1024i1'
; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
; THROUGHPUT-VL128B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
; THROUGHPUT-VL256B-LABEL: 'vreduce_and_v1024i1'
; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
; THROUGHPUT-VL256B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
; THROUGHPUT-VL512B-LABEL: 'vreduce_and_v1024i1'
; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
; THROUGHPUT-VL512B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
; THROUGHPUT-VL1024B-LABEL: 'vreduce_and_v1024i1'
; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
; THROUGHPUT-VL1024B-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.v1024i1(<1024 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv1i1(<vscale x 1 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv1i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv1i1(<vscale x 1 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv2i1(<vscale x 2 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv2i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv2i1(<vscale x 2 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv4i1(<vscale x 4 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv4i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv4i1(<vscale x 4 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv8i1(<vscale x 8 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv8i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv8i1(<vscale x 8 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv16i1(<vscale x 16 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv16i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv16i1(<vscale x 16 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv32i1(<vscale x 32 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv32i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv32i1(<vscale x 32 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv64i1(<vscale x 64 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv64i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv64i1(<vscale x 64 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv128i1(<vscale x 128 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv128i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv128i1(<vscale x 128 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv256i1(<vscale x 256 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv256i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv256i1(<vscale x 256 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv512i1(<vscale x 512 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv512i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv512i1(<vscale x 512 x i1> %v)
ret i1 %red
}

define zeroext i1 @vreduce_and_nxv1024i1(<vscale x 1024 x i1> %v) {
; THROUGHPUT-LABEL: 'vreduce_and_nxv1024i1'
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
; THROUGHPUT-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i1 %red
;
%red = call i1 @llvm.vector.reduce.and.nxv1024i1(<vscale x 1024 x i1> %v)
ret i1 %red
}
Loading

0 comments on commit 369c617

Please sign in to comment.