Skip to content

Commit

Permalink
[RISCV] Allow non-power-of-2 vectors for VLS code generation
Browse files Browse the repository at this point in the history
SLP supports non-power-of-2 vectors [1], so we should consider supporting this
for RISC-V vector code generation. It is natural to support non-power-of-2 VLS
vectors for the vector extension, as VL does not impose any constraints on this.

In theory, we could support any length, but we want to prevent the
number of MVTs from growing too quickly. Therefore, we only add v3, v5,
v7 and v15.

[1] llvm#77790
  • Loading branch information
kito-cheng committed Jun 28, 2024
1 parent 08e9653 commit c59d3ac
Show file tree
Hide file tree
Showing 30 changed files with 696 additions and 1,232 deletions.
2 changes: 0 additions & 2 deletions llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2183,8 +2183,6 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
[[maybe_unused]] bool ExactlyVecRegSized =
Subtarget->expandVScale(SubVecVT.getSizeInBits())
.isKnownMultipleOf(Subtarget->expandVScale(VecRegSize));
assert(isPowerOf2_64(Subtarget->expandVScale(SubVecVT.getSizeInBits())
.getKnownMinValue()));
assert(Idx == 0 && (ExactlyVecRegSized || V.isUndef()));
}
MVT ContainerVT = VT;
Expand Down
39 changes: 30 additions & 9 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2644,9 +2644,14 @@ static bool useRVVForFixedLengthVectorVT(MVT VT,
if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
return false;

// TODO: Perhaps an artificial restriction, but worth having whilst getting
// the base fixed length RVV support in place.
if (!VT.isPow2VectorType())
// Only support non-power-of-2 fixed length vector types with lengths 3, 5, 7,
// or 15.
// In theory, we could support any length, but we want to prevent the
// number of MVTs from growing too quickly. Therefore, we only add these
// specific types.
unsigned NumElems = VT.getVectorNumElements();
if (!VT.isPow2VectorType() && NumElems != 3 && NumElems != 5 &&
NumElems != 7 && NumElems != 15)
return false;

return true;
Expand Down Expand Up @@ -2683,10 +2688,14 @@ static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
// We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
// narrower types. The smallest fractional LMUL we support is 8/ELEN. Within
// each fractional LMUL we support SEW between 8 and LMUL*ELEN.
unsigned NumVLSElts = VT.getVectorNumElements();
if (!isPowerOf2_32(NumVLSElts))
NumVLSElts = llvm::NextPowerOf2 (NumVLSElts);

unsigned NumElts =
(VT.getVectorNumElements() * RISCV::RVVBitsPerBlock) / MinVLen;
(NumVLSElts * RISCV::RVVBitsPerBlock) / MinVLen;
NumElts = std::max(NumElts, RISCV::RVVBitsPerBlock / MaxELen);
assert(isPowerOf2_32(NumElts) && "Expected power of 2 NumElts");

return MVT::getScalableVectorVT(EltVT, NumElts);
}
}
Expand Down Expand Up @@ -3628,6 +3637,8 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
// XLenVT if we're producing a v8i1. This results in more consistent
// codegen across RV32 and RV64.
unsigned NumViaIntegerBits = std::clamp(NumElts, 8u, Subtarget.getXLen());
if (!isPowerOf2_32(NumViaIntegerBits))
NumViaIntegerBits = llvm::NextPowerOf2 (NumViaIntegerBits);
NumViaIntegerBits = std::min(NumViaIntegerBits, Subtarget.getELen());
// If we have to use more than one INSERT_VECTOR_ELT then this
// optimization is likely to increase code size; avoid peforming it in
Expand Down Expand Up @@ -3671,10 +3682,16 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG,
// If we're producing a smaller vector than our minimum legal integer
// type, bitcast to the equivalent (known-legal) mask type, and extract
// our final mask.
assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
Vec = DAG.getBitcast(MVT::v8i1, Vec);
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
DAG.getConstant(0, DL, XLenVT));
if (IntegerViaVecVT == MVT::v1i8){
assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
Vec = DAG.getBitcast(MVT::v8i1, Vec);
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
DAG.getConstant(0, DL, XLenVT));
} else if (IntegerViaVecVT == MVT::v1i16) {
Vec = DAG.getBitcast(MVT::v16i1, Vec);
Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
DAG.getConstant(0, DL, XLenVT));
}
} else {
// Else we must have produced an integer type with the same size as the
// mask type; bitcast for the final result.
Expand Down Expand Up @@ -4827,6 +4844,10 @@ static bool isLegalBitRotate(ShuffleVectorSDNode *SVN,

EVT VT = SVN->getValueType(0);
unsigned NumElts = VT.getVectorNumElements();
// We don't handle non-power-of-2 here.
if (!isPowerOf2_32(NumElts))
return false;

unsigned EltSizeInBits = VT.getScalarSizeInBits();
unsigned NumSubElts;
if (!ShuffleVectorInst::isBitRotateMask(SVN->getMask(), EltSizeInBits, 2,
Expand Down
196 changes: 89 additions & 107 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-bitreverse-vp.ll
Original file line number Diff line number Diff line change
Expand Up @@ -1649,28 +1649,16 @@ declare <15 x i64> @llvm.vp.bitreverse.v15i64(<15 x i64>, <15 x i1>, i32)
define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroext %evl) {
; RV32-LABEL: vp_bitreverse_v15i64:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -48
; RV32-NEXT: .cfi_def_cfa_offset 48
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: li a2, 24
; RV32-NEXT: mul a1, a1, a2
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 24 * vlenb
; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x18, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 24 * vlenb
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 16(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1, v0.t
Expand All @@ -1683,21 +1671,21 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 4
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 48
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: addi a4, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: addi a4, sp, 8
; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a4), zero
; RV32-NEXT: csrr a4, vlenb
; RV32-NEXT: slli a4, a4, 3
; RV32-NEXT: add a4, sp, a4
; RV32-NEXT: addi a4, a4, 48
; RV32-NEXT: addi a4, a4, 16
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vx v24, v8, a4, v0.t
; RV32-NEXT: vsll.vi v24, v24, 24, v0.t
; RV32-NEXT: addi a5, sp, 48
; RV32-NEXT: addi a5, sp, 16
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vand.vv v24, v8, v16, v0.t
; RV32-NEXT: vsll.vi v16, v24, 8, v0.t
Expand All @@ -1706,62 +1694,65 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 48
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vl8r.v v24, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: csrr a5, vlenb
; RV32-NEXT: slli a5, a5, 4
; RV32-NEXT: add a5, sp, a5
; RV32-NEXT: addi a5, a5, 48
; RV32-NEXT: addi a5, a5, 16
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vx v16, v8, a1, v0.t
; RV32-NEXT: vsrl.vx v24, v8, a3, v0.t
; RV32-NEXT: vand.vx v24, v24, a2, v0.t
; RV32-NEXT: vor.vv v16, v24, v16, v0.t
; RV32-NEXT: addi a1, sp, 48
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vs8r.v v16, (a1) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vi v24, v8, 24, v0.t
; RV32-NEXT: vand.vx v24, v24, a4, v0.t
; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 48
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vand.vv v8, v8, v16, v0.t
; RV32-NEXT: vor.vv v8, v8, v24, v0.t
; RV32-NEXT: addi a1, sp, 40
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a1), zero
; RV32-NEXT: addi a1, sp, 48
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 4
; RV32-NEXT: add a1, sp, a1
; RV32-NEXT: addi a1, a1, 48
; RV32-NEXT: addi a1, a1, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v8, v16, 4, v0.t
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
; RV32-NEXT: addi a1, sp, 32
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a1), zero
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 4, v0.t
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v24, a1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vi v16, v16, 4, v0.t
; RV32-NEXT: vor.vv v16, v8, v16, v0.t
; RV32-NEXT: vsrl.vi v8, v16, 2, v0.t
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a1), zero
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 2, v0.t
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v24, a1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vi v16, v16, 2, v0.t
; RV32-NEXT: vor.vv v8, v8, v16, v0.t
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
; RV32-NEXT: vor.vv v8, v16, v8, v0.t
; RV32-NEXT: vsrl.vi v16, v8, 1, v0.t
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v24, a1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v16, v24, v0.t
; RV32-NEXT: vand.vv v8, v8, v24, v0.t
; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
Expand All @@ -1770,7 +1761,7 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
; RV32-NEXT: li a1, 24
; RV32-NEXT: mul a0, a0, a1
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vp_bitreverse_v15i64:
Expand Down Expand Up @@ -1856,27 +1847,15 @@ define <15 x i64> @vp_bitreverse_v15i64(<15 x i64> %va, <15 x i1> %m, i32 zeroex
define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %evl) {
; RV32-LABEL: vp_bitreverse_v15i64_unmasked:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -48
; RV32-NEXT: .cfi_def_cfa_offset 48
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: csrr a1, vlenb
; RV32-NEXT: slli a1, a1, 3
; RV32-NEXT: sub sp, sp, a1
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x30, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 48 + 8 * vlenb
; RV32-NEXT: sw zero, 20(sp)
; RV32-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 16 + 8 * vlenb
; RV32-NEXT: sw zero, 12(sp)
; RV32-NEXT: lui a1, 1044480
; RV32-NEXT: sw a1, 16(sp)
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
; RV32-NEXT: sw a1, 44(sp)
; RV32-NEXT: sw a1, 40(sp)
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
; RV32-NEXT: sw a1, 36(sp)
; RV32-NEXT: sw a1, 32(sp)
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
; RV32-NEXT: sw a1, 28(sp)
; RV32-NEXT: sw a1, 24(sp)
; RV32-NEXT: sw a1, 8(sp)
; RV32-NEXT: li a1, 56
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vsll.vx v16, v8, a1
Expand All @@ -1886,66 +1865,69 @@ define <15 x i64> @vp_bitreverse_v15i64_unmasked(<15 x i64> %va, i32 zeroext %ev
; RV32-NEXT: li a3, 40
; RV32-NEXT: vsll.vx v24, v24, a3
; RV32-NEXT: vor.vv v16, v16, v24
; RV32-NEXT: addi a4, sp, 48
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: addi a4, sp, 16
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v24, (a4), zero
; RV32-NEXT: vs8r.v v16, (a4) # Unknown-size Folded Spill
; RV32-NEXT: addi a4, sp, 8
; RV32-NEXT: vsetivli zero, 15, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a4), zero
; RV32-NEXT: lui a4, 4080
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vx v0, v8, a4
; RV32-NEXT: vsll.vi v0, v0, 24
; RV32-NEXT: vand.vv v16, v8, v24
; RV32-NEXT: vsll.vi v16, v16, 8
; RV32-NEXT: vor.vv v16, v0, v16
; RV32-NEXT: addi a5, sp, 48
; RV32-NEXT: vand.vv v24, v8, v16
; RV32-NEXT: vsll.vi v24, v24, 8
; RV32-NEXT: vor.vv v24, v0, v24
; RV32-NEXT: addi a5, sp, 16
; RV32-NEXT: vl8r.v v0, (a5) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v16, v0, v16
; RV32-NEXT: vs8r.v v16, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vor.vv v24, v0, v24
; RV32-NEXT: vs8r.v v24, (a5) # Unknown-size Folded Spill
; RV32-NEXT: vsrl.vx v0, v8, a3
; RV32-NEXT: vand.vx v0, v0, a2
; RV32-NEXT: vsrl.vx v16, v8, a1
; RV32-NEXT: vor.vv v0, v0, v16
; RV32-NEXT: vsrl.vi v16, v8, 8
; RV32-NEXT: vand.vv v16, v16, v24
; RV32-NEXT: vsrl.vx v24, v8, a1
; RV32-NEXT: vor.vv v24, v0, v24
; RV32-NEXT: vsrl.vi v0, v8, 8
; RV32-NEXT: vand.vv v16, v0, v16
; RV32-NEXT: vsrl.vi v8, v8, 24
; RV32-NEXT: vand.vx v8, v8, a4
; RV32-NEXT: vor.vv v8, v16, v8
; RV32-NEXT: addi a1, sp, 40
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a1), zero
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vor.vv v8, v8, v0
; RV32-NEXT: addi a1, sp, 48
; RV32-NEXT: vl8r.v v24, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v8, v24, v8
; RV32-NEXT: vsrl.vi v24, v8, 4
; RV32-NEXT: vand.vv v24, v24, v16
; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: addi a1, sp, 32
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a1), zero
; RV32-NEXT: vor.vv v8, v8, v24
; RV32-NEXT: addi a1, sp, 16
; RV32-NEXT: vl8r.v v16, (a1) # Unknown-size Folded Reload
; RV32-NEXT: vor.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v16, v8, 4
; RV32-NEXT: lui a1, 61681
; RV32-NEXT: addi a1, a1, -241
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v24, a1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v16, v24
; RV32-NEXT: vand.vv v8, v8, v24
; RV32-NEXT: vsll.vi v8, v8, 4
; RV32-NEXT: vor.vv v8, v24, v8
; RV32-NEXT: vsrl.vi v24, v8, 2
; RV32-NEXT: vand.vv v24, v24, v16
; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: addi a1, sp, 24
; RV32-NEXT: vsetivli zero, 16, e64, m8, ta, ma
; RV32-NEXT: vlse64.v v16, (a1), zero
; RV32-NEXT: vor.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v16, v8, 2
; RV32-NEXT: lui a1, 209715
; RV32-NEXT: addi a1, a1, 819
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v24, a1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v16, v24
; RV32-NEXT: vand.vv v8, v8, v24
; RV32-NEXT: vsll.vi v8, v8, 2
; RV32-NEXT: vor.vv v8, v24, v8
; RV32-NEXT: vsrl.vi v24, v8, 1
; RV32-NEXT: vand.vv v24, v24, v16
; RV32-NEXT: vand.vv v8, v8, v16
; RV32-NEXT: vor.vv v8, v16, v8
; RV32-NEXT: vsrl.vi v16, v8, 1
; RV32-NEXT: lui a1, 349525
; RV32-NEXT: addi a1, a1, 1365
; RV32-NEXT: vsetivli zero, 30, e32, m8, ta, ma
; RV32-NEXT: vmv.v.x v24, a1
; RV32-NEXT: vsetvli zero, a0, e64, m8, ta, ma
; RV32-NEXT: vand.vv v16, v16, v24
; RV32-NEXT: vand.vv v8, v8, v24
; RV32-NEXT: vadd.vv v8, v8, v8
; RV32-NEXT: vor.vv v8, v24, v8
; RV32-NEXT: vor.vv v8, v16, v8
; RV32-NEXT: csrr a0, vlenb
; RV32-NEXT: slli a0, a0, 3
; RV32-NEXT: add sp, sp, a0
; RV32-NEXT: addi sp, sp, 48
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: ret
;
; RV64-LABEL: vp_bitreverse_v15i64_unmasked:
Expand Down
Loading

0 comments on commit c59d3ac

Please sign in to comment.