Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

riscv64: Implement SIMD swizzle and shuffle #6515

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 17 additions & 1 deletion cranelift/codegen/src/isa/riscv64/inst_vector.isle
Original file line number Diff line number Diff line change
Expand Up @@ -779,4 +779,20 @@
;; Materialize the mask into an X register, and move it into the bottom of
;; the vector register.
(rule (gen_vec_mask mask)
(rv_vmv_sx (imm $I64 mask) (vstate_from_type $I64X2)))
(rv_vmv_sx (imm $I64 mask) (vstate_from_type $I64X2)))


;; Loads a `VCodeConstant` value into a vector register. For some special `VCodeConstant`s
;; we can use a dedicated instruction, otherwise we load the value from the pool.
;;
;; Type is the preferred type to use when loading the constant.
(decl gen_constant (Type VCodeConstant) VReg)

;; The fallback case is to load the constant from the pool.
(rule (gen_constant ty n)
(vec_load
(element_width_from_type ty)
(VecAMode.UnitStride (gen_const_amode n))
(mem_flags_trusted)
(unmasked)
ty))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nitpick: missing trailing newline

22 changes: 16 additions & 6 deletions cranelift/codegen/src/isa/riscv64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,7 @@
;; ;;;; Rules for `vconst` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type (ty_vec_fits_in_register ty) (vconst n)))
(vec_load
(element_width_from_type ty)
(VecAMode.UnitStride (gen_const_amode (const_to_vconst n)))
(mem_flags_trusted)
(unmasked)
ty))
(gen_constant ty (const_to_vconst n)))

;;;; Rules for `f32const` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

Expand Down Expand Up @@ -1418,3 +1413,18 @@

(rule 2 (lower (has_type (ty_vec_fits_in_register ty) (swizzle x (replicated_uimm5 y))))
(rv_vrgather_vi x y (unmasked) ty))

;;;; Rules for `shuffle` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

;; Use a vrgather to load all 0-15 lanes from x. And then modify the mask to load all
;; 16-31 lanes from y. Finally, use a vor to combine the two vectors.
;;
;; vrgather will insert a 0 for lanes that are out of bounds, so we can let it load
;; negative and out of bounds indexes.
(rule (lower (has_type (ty_vec_fits_in_register ty @ $I8X16) (shuffle x y (vconst_from_immediate mask))))
(if-let neg16 (imm5_from_i8 -16))
(let ((x_mask VReg (gen_constant ty mask))
(x_lanes VReg (rv_vrgather_vv x x_mask (unmasked) ty))
(y_mask VReg (rv_vadd_vi x_mask neg16 (unmasked) ty))
(y_lanes VReg (rv_vrgather_vv y y_mask (unmasked) ty)))
(rv_vor_vv x_lanes y_lanes (unmasked) ty)))
7 changes: 7 additions & 0 deletions cranelift/codegen/src/machinst/isle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,6 +342,13 @@ macro_rules! isle_lower_prelude_methods {
Some(u128::from_le_bytes(bytes.try_into().ok()?))
}

#[inline]
fn vconst_from_immediate(&mut self, imm: Immediate) -> Option<VCodeConstant> {
Some(self.lower_ctx.use_constant(VCodeConstantData::Generated(
self.lower_ctx.get_immediate_data(imm).clone(),
)))
}

#[inline]
fn vec_mask_from_immediate(&mut self, imm: Immediate) -> Option<VecMask> {
let data = self.lower_ctx.get_immediate_data(imm);
Expand Down
5 changes: 5 additions & 0 deletions cranelift/codegen/src/prelude_lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,11 @@
(decl u128_from_immediate (u128) Immediate)
(extern extractor u128_from_immediate u128_from_immediate)

;; Extracts an `Immediate` as a `VCodeConstant`.

(decl vconst_from_immediate (VCodeConstant) Immediate)
(extern extractor vconst_from_immediate vconst_from_immediate)

;; Accessor for `Constant` as u128.

(decl u128_from_constant (u128) Constant)
Expand Down
61 changes: 61 additions & 0 deletions cranelift/filetests/filetests/isa/riscv64/simd-shuffle.clif
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
test compile precise-output
set unwind_info=false
target riscv64 has_v

function %shuffle_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
v2 = shuffle v0, v1, [3 0 31 26 4 6 12 11 23 13 24 4 2 15 17 5]
return v2
}

; VCode:
; add sp,-16
; sd ra,8(sp)
; sd fp,0(sp)
; mv fp,sp
; block0:
; vle8.v v1,16(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vle8.v v3,32(fp) #avl=16, #vtype=(e8, m1, ta, ma)
; vle8.v v6,[const(0)] #avl=16, #vtype=(e8, m1, ta, ma)
; vrgather.vv v8,v1,v6 #avl=16, #vtype=(e8, m1, ta, ma)
; vadd.vi v10,v6,-16 #avl=16, #vtype=(e8, m1, ta, ma)
; vrgather.vv v12,v3,v10 #avl=16, #vtype=(e8, m1, ta, ma)
; vor.vv v14,v8,v12 #avl=16, #vtype=(e8, m1, ta, ma)
; vse8.v v14,0(a0) #avl=16, #vtype=(e8, m1, ta, ma)
; ld ra,8(sp)
; ld fp,0(sp)
; add sp,+16
; ret
;
; Disassembled:
; block0: ; offset 0x0
; addi sp, sp, -0x10
; sd ra, 8(sp)
; sd s0, 0(sp)
; ori s0, sp, 0
; block1: ; offset 0x10
; .byte 0x57, 0x70, 0x08, 0xcc
; addi t6, s0, 0x10
; .byte 0x87, 0x80, 0x0f, 0x02
; addi t6, s0, 0x20
; .byte 0x87, 0x81, 0x0f, 0x02
; auipc t6, 0
; addi t6, t6, 0x3c
; .byte 0x07, 0x83, 0x0f, 0x02
; .byte 0x57, 0x04, 0x13, 0x32
; .byte 0x57, 0x35, 0x68, 0x02
; .byte 0x57, 0x06, 0x35, 0x32
; .byte 0x57, 0x07, 0x86, 0x2a
; .byte 0x27, 0x07, 0x05, 0x02
; ld ra, 8(sp)
; ld s0, 0(sp)
; addi sp, sp, 0x10
; ret
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; .byte 0x00, 0x00, 0x00, 0x00
; lb zero, 0x1a1(t5)
; .byte 0x04, 0x06, 0x0c, 0x0b
; auipc s10, 0x4180
; .byte 0x02, 0x0f, 0x11, 0x05

1 change: 1 addition & 0 deletions cranelift/filetests/filetests/runtests/simd-shuffle.clif
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ set enable_simd
target x86_64 has_sse3 has_ssse3 has_sse41
target x86_64 has_sse3 has_ssse3 has_sse41 has_avx
target x86_64 has_sse3 has_ssse3 has_sse41 has_avx512vl has_avx512vbmi
target riscv64gc has_v

function %shuffle_i8x16(i8x16, i8x16) -> i8x16 {
block0(v0: i8x16, v1: i8x16):
Expand Down