Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Add Float SIMD lowerings for some instructions #6403

Merged
merged 7 commits into from
May 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,12 +217,10 @@ fn ignore(testsuite: &str, testname: &str, strategy: &str) -> bool {
"simd_boolean",
"simd_conversions",
"simd_f32x4",
"simd_f32x4_arith",
"simd_f32x4_cmp",
"simd_f32x4_pmin_pmax",
"simd_f32x4_rounding",
"simd_f64x2",
"simd_f64x2_arith",
"simd_f64x2_cmp",
"simd_f64x2_pmin_pmax",
"simd_f64x2_rounding",
Expand Down
7 changes: 5 additions & 2 deletions cranelift/codegen/src/isa/riscv64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1580,10 +1580,13 @@ impl Inst {

// Note: vs2 and vs1 here are opposite to the standard scalar ordering.
// This is noted in Section 10.1 of the RISC-V Vector spec.
match (op, vs1) {
(VecAluOpRRR::VrsubVX, vs1) if vs1 == zero_reg() => {
match (op, vs2, vs1) {
(VecAluOpRRR::VrsubVX, _, vs1) if vs1 == zero_reg() => {
format!("vneg.v {},{} {}", vd_s, vs2_s, vstate)
}
(VecAluOpRRR::VfsgnjnVV, vs2, vs1) if vs2 == vs1 => {
format!("vfneg.v {},{} {}", vd_s, vs2_s, vstate)
}
_ => format!("{} {},{},{} {}", op, vd_s, vs2_s, vs1_s, vstate),
}
}
Expand Down
46 changes: 39 additions & 7 deletions cranelift/codegen/src/isa/riscv64/inst/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -251,16 +251,26 @@ impl VecAluOpRRR {
pub fn funct6(&self) -> u32 {
// See: https://github.com/riscv/riscv-v-spec/blob/master/inst-table.adoc
match self {
VecAluOpRRR::VaddVV | VecAluOpRRR::VaddVX => 0b000000,
VecAluOpRRR::VsubVV | VecAluOpRRR::VsubVX => 0b000010,
VecAluOpRRR::VaddVV
| VecAluOpRRR::VaddVX
| VecAluOpRRR::VfaddVV
| VecAluOpRRR::VfaddVF => 0b000000,
VecAluOpRRR::VsubVV
| VecAluOpRRR::VsubVX
| VecAluOpRRR::VfsubVV
| VecAluOpRRR::VfsubVF => 0b000010,
VecAluOpRRR::VrsubVX => 0b000011,
VecAluOpRRR::VmulVV => 0b100101,
VecAluOpRRR::VmulhVV => 0b100111,
VecAluOpRRR::VmulhuVV => 0b100100,
VecAluOpRRR::VmulhuVV | VecAluOpRRR::VfmulVV | VecAluOpRRR::VfmulVF => 0b100100,
VecAluOpRRR::VandVV => 0b001001,
VecAluOpRRR::VorVV => 0b001010,
VecAluOpRRR::VxorVV => 0b001011,
VecAluOpRRR::VslidedownVX => 0b001111,
VecAluOpRRR::VfrsubVF => 0b100111,
VecAluOpRRR::VfdivVV | VecAluOpRRR::VfdivVF => 0b100000,
VecAluOpRRR::VfrdivVF => 0b100001,
VecAluOpRRR::VfsgnjnVV => 0b001001,
}
}

Expand All @@ -278,6 +288,17 @@ impl VecAluOpRRR {
| VecAluOpRRR::VsubVX
| VecAluOpRRR::VrsubVX
| VecAluOpRRR::VslidedownVX => VecOpCategory::OPIVX,
VecAluOpRRR::VfaddVV
| VecAluOpRRR::VfsubVV
| VecAluOpRRR::VfmulVV
| VecAluOpRRR::VfdivVV
| VecAluOpRRR::VfsgnjnVV => VecOpCategory::OPFVV,
VecAluOpRRR::VfaddVF
| VecAluOpRRR::VfsubVF
| VecAluOpRRR::VfrsubVF
| VecAluOpRRR::VfmulVF
| VecAluOpRRR::VfdivVF
| VecAluOpRRR::VfrdivVF => VecOpCategory::OPFVF,
}
}

Expand Down Expand Up @@ -360,6 +381,7 @@ impl VecAluOpRR {
VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => {
0b010000
}
VecAluOpRR::VfsqrtV => 0b010011,
VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0b010111,
}
}
Expand All @@ -369,7 +391,7 @@ impl VecAluOpRR {
VecAluOpRR::VmvSX => VecOpCategory::OPMVX,
VecAluOpRR::VmvXS => VecOpCategory::OPMVV,
VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => VecOpCategory::OPFVF,
VecAluOpRR::VfmvFS => VecOpCategory::OPFVV,
VecAluOpRR::VfmvFS | VecAluOpRR::VfsqrtV => VecOpCategory::OPFVV,
VecAluOpRR::VmvVV => VecOpCategory::OPIVV,
VecAluOpRR::VmvVX => VecOpCategory::OPIVX,
}
Expand All @@ -386,6 +408,8 @@ impl VecAluOpRR {
VecAluOpRR::VfmvSF => 0b00000,
// VWFUNARY0
VecAluOpRR::VfmvFS => 0b00000,
// VFUNARY1
VecAluOpRR::VfsqrtV => 0b00000,
// These don't have a explicit encoding table, but Section 11.16 Vector Integer Move Instruction states:
// > The first operand specifier (vs2) must contain v0, and any other vector register number in vs2 is reserved.
VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => 0,
Expand All @@ -397,7 +421,11 @@ impl VecAluOpRR {
/// other way around. As far as I can tell only vmv.v.* are backwards.
pub fn vs_is_vs2_encoded(&self) -> bool {
match self {
VecAluOpRR::VmvSX | VecAluOpRR::VmvXS | VecAluOpRR::VfmvSF | VecAluOpRR::VfmvFS => true,
VecAluOpRR::VmvSX
| VecAluOpRR::VmvXS
| VecAluOpRR::VfmvSF
| VecAluOpRR::VfmvFS
| VecAluOpRR::VfsqrtV => true,
VecAluOpRR::VmvVV | VecAluOpRR::VmvVX | VecAluOpRR::VfmvVF => false,
}
}
Expand All @@ -408,15 +436,18 @@ impl VecAluOpRR {
| VecAluOpRR::VmvSX
| VecAluOpRR::VmvVV
| VecAluOpRR::VmvVX
| VecAluOpRR::VfmvVF => RegClass::Vector,
| VecAluOpRR::VfmvVF
| VecAluOpRR::VfsqrtV => RegClass::Vector,
VecAluOpRR::VmvXS => RegClass::Int,
VecAluOpRR::VfmvFS => RegClass::Float,
}
}

pub fn src_regclass(&self) -> RegClass {
match self {
VecAluOpRR::VmvXS | VecAluOpRR::VfmvFS | VecAluOpRR::VmvVV => RegClass::Vector,
VecAluOpRR::VmvXS | VecAluOpRR::VfmvFS | VecAluOpRR::VmvVV | VecAluOpRR::VfsqrtV => {
RegClass::Vector
}
VecAluOpRR::VfmvSF | VecAluOpRR::VfmvVF => RegClass::Float,
VecAluOpRR::VmvSX | VecAluOpRR::VmvVX => RegClass::Int,
}
Expand All @@ -430,6 +461,7 @@ impl fmt::Display for VecAluOpRR {
VecAluOpRR::VmvXS => "vmv.x.s",
VecAluOpRR::VfmvSF => "vfmv.s.f",
VecAluOpRR::VfmvFS => "vfmv.f.s",
VecAluOpRR::VfsqrtV => "vfsqrt.v",
VecAluOpRR::VmvVV => "vmv.v.v",
VecAluOpRR::VmvVX => "vmv.v.x",
VecAluOpRR::VfmvVF => "vfmv.v.f",
Expand Down
79 changes: 79 additions & 0 deletions cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -92,12 +92,23 @@
(VandVV)
(VorVV)
(VxorVV)
(VfaddVV)
(VfsubVV)
(VfmulVV)
(VfdivVV)
(VfsgnjnVV)

;; Vector-Scalar Opcodes
(VaddVX)
(VsubVX)
(VrsubVX)
(VslidedownVX)
(VfaddVF)
(VfsubVF)
(VfrsubVF)
(VfmulVF)
(VfdivVF)
(VfrdivVF)
))

;; Register-Imm ALU Ops
Expand Down Expand Up @@ -125,6 +136,7 @@
(VmvVV)
(VmvVX)
(VfmvVF)
(VfsqrtV)
))

;; Returns the canonical destination type for a VecAluOpRRImm5.
Expand Down Expand Up @@ -307,6 +319,73 @@
(rule (rv_vxor_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VxorVV) vs2 vs1 vstate))

;; Helper for emitting the `vfadd.vv` instruction.
(decl rv_vfadd_vv (Reg Reg VState) Reg)
(rule (rv_vfadd_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfaddVV) vs2 vs1 vstate))

;; Helper for emitting the `vfadd.vf` instruction.
(decl rv_vfadd_vf (Reg Reg VState) Reg)
(rule (rv_vfadd_vf vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfaddVF) vs2 vs1 vstate))

;; Helper for emitting the `vfsub.vv` instruction.
(decl rv_vfsub_vv (Reg Reg VState) Reg)
(rule (rv_vfsub_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfsubVV) vs2 vs1 vstate))

;; Helper for emitting the `vfsub.vf` instruction.
(decl rv_vfsub_vf (Reg Reg VState) Reg)
(rule (rv_vfsub_vf vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfsubVF) vs2 vs1 vstate))

;; Helper for emitting the `vfrsub.vf` instruction.
(decl rv_vfrsub_vf (Reg Reg VState) Reg)
(rule (rv_vfrsub_vf vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfrsubVF) vs2 vs1 vstate))

;; Helper for emitting the `vfmul.vv` instruction.
(decl rv_vfmul_vv (Reg Reg VState) Reg)
(rule (rv_vfmul_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfmulVV) vs2 vs1 vstate))

;; Helper for emitting the `vfmul.vf` instruction.
(decl rv_vfmul_vf (Reg Reg VState) Reg)
(rule (rv_vfmul_vf vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfmulVF) vs2 vs1 vstate))

;; Helper for emitting the `vfdiv.vv` instruction.
(decl rv_vfdiv_vv (Reg Reg VState) Reg)
(rule (rv_vfdiv_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfdivVV) vs2 vs1 vstate))

;; Helper for emitting the `vfdiv.vf` instruction.
(decl rv_vfdiv_vf (Reg Reg VState) Reg)
(rule (rv_vfdiv_vf vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfdivVF) vs2 vs1 vstate))

;; Helper for emitting the `vfrdiv.vf` instruction.
(decl rv_vfrdiv_vf (Reg Reg VState) Reg)
(rule (rv_vfrdiv_vf vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfrdivVF) vs2 vs1 vstate))

;; Helper for emitting the `vfsgnjn.vv` ("Floating Point Sign Injection Negated") instruction.
;; The output of this instruction is `vs2` with the negated sign bit from `vs1`
(decl rv_vfsgnjn_vv (Reg Reg VState) Reg)
(rule (rv_vfsgnjn_vv vs2 vs1 vstate)
(vec_alu_rrr (VecAluOpRRR.VfsgnjnVV) vs2 vs1 vstate))

;; Helper for emitting the `vfneg.v` instruction.
;; This instruction is a mnemonic for `vfsgnjn.vv vd, vs, vs`
(decl rv_vfneg_v (Reg VState) Reg)
(rule (rv_vfneg_v vs vstate) (rv_vfsgnjn_vv vs vs vstate))

;; Helper for emitting the `vfsqrt.v` instruction.
;; This instruction splats the F regsiter into all elements of the destination vector.
(decl rv_vfsqrt_v (Reg VState) Reg)
(rule (rv_vfsqrt_v vs vstate)
(vec_alu_rr (VecAluOpRR.VfsqrtV) vs vstate))

;; Helper for emitting the `vslidedown.vx` instruction.
;; `vslidedown` moves all elements in the vector down by n elements.
;; The top most elements are up to the tail policy.
Expand Down
64 changes: 58 additions & 6 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -584,9 +584,12 @@
(rv_fabs ty x))

;;;; Rules for `fneg` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (fneg x)))
(rule 0 (lower (has_type (ty_scalar_float ty) (fneg x)))
(rv_fneg ty x))

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fneg x)))
(rv_vfneg_v x ty))

;;;; Rules for `fcopysign` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (fcopysign x y)))
(rv_fsgnj ty x y))
Expand All @@ -597,9 +600,11 @@


;;;; Rules for `sqrt` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (has_type ty (sqrt x)))
(rule 0 (lower (has_type (ty_scalar_float ty) (sqrt x)))
(rv_fsqrt ty x))

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (sqrt x)))
(rv_vfsqrt_v x ty))

;;;; Rules for `AtomicRMW` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule -1
Expand Down Expand Up @@ -706,18 +711,65 @@


;;;;; Rules for for float arithmetic
(rule (lower (has_type ty (fadd x y)))


;;;; Rules for `fadd` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule 0 (lower (has_type (ty_scalar_float ty) (fadd x y)))
(rv_fadd ty x y))

(rule (lower (has_type ty (fsub x y)))
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fadd x y)))
(rv_vfadd_vv x y ty))

(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fadd x (splat y))))
(rv_vfadd_vf x y ty))

(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fadd (splat x) y)))
(rv_vfadd_vf y x ty))


;;;; Rules for `fsub` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_scalar_float ty) (fsub x y)))
(rv_fsub ty x y))

(rule (lower (has_type ty (fmul x y)))
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fsub x y)))
(rv_vfsub_vv x y ty))

(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fsub x (splat y))))
(rv_vfsub_vf x y ty))

(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fsub (splat x) y)))
(rv_vfrsub_vf y x ty))

;;;; Rules for `fmul` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_scalar_float ty) (fmul x y)))
(rv_fmul ty x y))

(rule (lower (has_type ty (fdiv x y)))
(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fmul x y)))
(rv_vfmul_vv x y ty))

(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fmul x (splat y))))
(rv_vfmul_vf x y ty))

(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fmul (splat x) y)))
(rv_vfmul_vf y x ty))


;;;; Rules for `fdiv` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule 0 (lower (has_type (ty_scalar_float ty) (fdiv x y)))
(rv_fdiv ty x y))

(rule 1 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x y)))
(rv_vfdiv_vv x y ty))

(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (fdiv x (splat y))))
(rv_vfdiv_vf x y ty))

(rule 3 (lower (has_type (ty_vec_fits_in_register ty) (fdiv (splat x) y)))
(rv_vfrdiv_vf y x ty))

;;;; Rules for `fmin/fmax` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule
(lower (has_type ty (fmin x y)))
(gen_float_select (FloatSelectOP.Min) x y ty))
Expand Down
Loading