Skip to content

Commit

Permalink
Port widening ops to ISLE (AArch64) (#4751)
Browse files Browse the repository at this point in the history
Ported the existing implementations of the following opcodes for AArch64
to ISLE, and implemented support for 64-bit vectors (per the docs):
- `SwidenLow`
- `SwidenHigh`
- `UwidenLow`
- `UwidenHigh`

Also ported `WideningPairwiseDotProductS` as-is.

Copyright (c) 2022 Arm Limited
  • Loading branch information
dheaton-arm authored Aug 23, 2022
1 parent da1fb30 commit 3b68d76
Show file tree
Hide file tree
Showing 10 changed files with 250 additions and 161 deletions.
32 changes: 19 additions & 13 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -528,7 +528,8 @@
(t VecExtendOp)
(rd WritableReg)
(rn Reg)
(high_half bool))
(high_half bool)
(lane_size ScalarSize))

;; Move vector element to another vector element.
(VecMovElement
Expand Down Expand Up @@ -1080,18 +1081,10 @@
;; Type of vector element extensions.
(type VecExtendOp
(enum
;; Signed extension of 8-bit elements
(Sxtl8)
;; Signed extension of 16-bit elements
(Sxtl16)
;; Signed extension of 32-bit elements
(Sxtl32)
;; Unsigned extension of 8-bit elements
(Uxtl8)
;; Unsigned extension of 16-bit elements
(Uxtl16)
;; Unsigned extension of 32-bit elements
(Uxtl32)
;; Signed extension
(Sxtl)
;; Unsigned extension
(Uxtl)
))

;; A vector ALU operation.
Expand Down Expand Up @@ -1844,6 +1837,12 @@
(_ Unit (emit (MInst.MovFromVecSigned dst rn idx size scalar_size))))
dst))

(decl fpu_move_from_vec (Reg u8 VectorSize) Reg)
(rule (fpu_move_from_vec rn idx size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.FpuMoveFromVec dst rn idx size))))
dst))

;; Helper for emitting `MInst.Extend` instructions.
(decl extend (Reg bool u8 u8) Reg)
(rule (extend rn signed from_bits to_bits)
Expand All @@ -1858,6 +1857,13 @@
(_ Unit (emit (MInst.FpuExtend dst src size))))
dst))

;; Helper for emitting `MInst.VecExtend` instructions.
(decl vec_extend (VecExtendOp Reg bool ScalarSize) Reg)
(rule (vec_extend op src high_half size)
(let ((dst WritableReg (temp_writable_reg $I8X16))
(_ Unit (emit (MInst.VecExtend op dst src high_half size))))
dst))

;; Helper for emitting `MInst.LoadAcquire` instructions.
(decl load_acquire (Type Reg) Reg)
(rule (load_acquire ty addr)
Expand Down
10 changes: 10 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/args.rs
Original file line number Diff line number Diff line change
Expand Up @@ -653,6 +653,16 @@ impl ScalarSize {
ScalarSize::Size128 => panic!("can't widen 128-bits"),
}
}

pub fn narrow(&self) -> ScalarSize {
match self {
ScalarSize::Size8 => panic!("can't narrow 8-bits"),
ScalarSize::Size16 => ScalarSize::Size8,
ScalarSize::Size32 => ScalarSize::Size16,
ScalarSize::Size64 => ScalarSize::Size32,
ScalarSize::Size128 => ScalarSize::Size64,
}
}
}

/// Type used to communicate the size of a vector operand.
Expand Down
17 changes: 10 additions & 7 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2382,16 +2382,19 @@ impl MachInstEmit for Inst {
rd,
rn,
high_half,
lane_size,
} => {
let rd = allocs.next_writable(rd);
let rn = allocs.next(rn);
let (u, immh) = match t {
VecExtendOp::Sxtl8 => (0b0, 0b001),
VecExtendOp::Sxtl16 => (0b0, 0b010),
VecExtendOp::Sxtl32 => (0b0, 0b100),
VecExtendOp::Uxtl8 => (0b1, 0b001),
VecExtendOp::Uxtl16 => (0b1, 0b010),
VecExtendOp::Uxtl32 => (0b1, 0b100),
let immh = match lane_size {
ScalarSize::Size16 => 0b001,
ScalarSize::Size32 => 0b010,
ScalarSize::Size64 => 0b100,
_ => panic!("Unexpected VecExtend to lane size of {:?}", lane_size),
};
let u = match t {
VecExtendOp::Sxtl => 0b0,
VecExtendOp::Uxtl => 0b1,
};
sink.put4(
0b000_011110_0000_000_101001_00000_00000
Expand Down
18 changes: 12 additions & 6 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2581,60 +2581,66 @@ fn test_aarch64_binemit() {
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl8,
t: VecExtendOp::Sxtl,
rd: writable_vreg(4),
rn: vreg(27),
high_half: false,
lane_size: ScalarSize::Size16,
},
"64A7080F",
"sxtl v4.8h, v27.8b",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl16,
t: VecExtendOp::Sxtl,
rd: writable_vreg(17),
rn: vreg(19),
high_half: true,
lane_size: ScalarSize::Size32,
},
"71A6104F",
"sxtl2 v17.4s, v19.8h",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Sxtl32,
t: VecExtendOp::Sxtl,
rd: writable_vreg(30),
rn: vreg(6),
high_half: false,
lane_size: ScalarSize::Size64,
},
"DEA4200F",
"sxtl v30.2d, v6.2s",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl8,
t: VecExtendOp::Uxtl,
rd: writable_vreg(3),
rn: vreg(29),
high_half: true,
lane_size: ScalarSize::Size16,
},
"A3A7086F",
"uxtl2 v3.8h, v29.16b",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl16,
t: VecExtendOp::Uxtl,
rd: writable_vreg(15),
rn: vreg(12),
high_half: false,
lane_size: ScalarSize::Size32,
},
"8FA5102F",
"uxtl v15.4s, v12.4h",
));
insns.push((
Inst::VecExtend {
t: VecExtendOp::Uxtl32,
t: VecExtendOp::Uxtl,
rd: writable_vreg(28),
rn: vreg(2),
high_half: true,
lane_size: ScalarSize::Size64,
},
"5CA4206F",
"uxtl2 v28.2d, v2.4s",
Expand Down
50 changes: 11 additions & 39 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2041,47 +2041,19 @@ impl Inst {
rd,
rn,
high_half,
lane_size,
} => {
let (op, dest, src) = match (t, high_half) {
(VecExtendOp::Sxtl8, false) => {
("sxtl", VectorSize::Size16x8, VectorSize::Size8x8)
}
(VecExtendOp::Sxtl8, true) => {
("sxtl2", VectorSize::Size16x8, VectorSize::Size8x16)
}
(VecExtendOp::Sxtl16, false) => {
("sxtl", VectorSize::Size32x4, VectorSize::Size16x4)
}
(VecExtendOp::Sxtl16, true) => {
("sxtl2", VectorSize::Size32x4, VectorSize::Size16x8)
}
(VecExtendOp::Sxtl32, false) => {
("sxtl", VectorSize::Size64x2, VectorSize::Size32x2)
}
(VecExtendOp::Sxtl32, true) => {
("sxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
}
(VecExtendOp::Uxtl8, false) => {
("uxtl", VectorSize::Size16x8, VectorSize::Size8x8)
}
(VecExtendOp::Uxtl8, true) => {
("uxtl2", VectorSize::Size16x8, VectorSize::Size8x16)
}
(VecExtendOp::Uxtl16, false) => {
("uxtl", VectorSize::Size32x4, VectorSize::Size16x4)
}
(VecExtendOp::Uxtl16, true) => {
("uxtl2", VectorSize::Size32x4, VectorSize::Size16x8)
}
(VecExtendOp::Uxtl32, false) => {
("uxtl", VectorSize::Size64x2, VectorSize::Size32x2)
}
(VecExtendOp::Uxtl32, true) => {
("uxtl2", VectorSize::Size64x2, VectorSize::Size32x4)
}
let vec64 = VectorSize::from_lane_size(lane_size.narrow(), false);
let vec128 = VectorSize::from_lane_size(lane_size.narrow(), true);
let rd_size = VectorSize::from_lane_size(lane_size, true);
let (op, rn_size) = match (t, high_half) {
(VecExtendOp::Sxtl, false) => ("sxtl", vec64),
(VecExtendOp::Sxtl, true) => ("sxtl2", vec128),
(VecExtendOp::Uxtl, false) => ("uxtl", vec64),
(VecExtendOp::Uxtl, true) => ("uxtl2", vec128),
};
let rd = pretty_print_vreg_vector(rd.to_reg(), dest, allocs);
let rn = pretty_print_vreg_vector(rn, src, allocs);
let rd = pretty_print_vreg_vector(rd.to_reg(), rd_size, allocs);
let rn = pretty_print_vreg_vector(rn, rn_size, allocs);
format!("{} {}, {}", op, rd, rn)
}
&Inst::VecMovElement {
Expand Down
42 changes: 42 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1817,6 +1817,48 @@
(result Reg (uqxtn2 low_half y (lane_size ty))))
result))

;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty (swiden_low x)))
(vec_extend (VecExtendOp.Sxtl) x $false (lane_size ty)))

;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type (ty_vec128 ty) (swiden_high x)))
(vec_extend (VecExtendOp.Sxtl) x $true (lane_size ty)))

(rule (lower (has_type ty (swiden_high x)))
(if (ty_vec64 ty))
(let ((tmp Reg (fpu_move_from_vec x 1 (VectorSize.Size32x2))))
(vec_extend (VecExtendOp.Sxtl) tmp $false (lane_size ty))))

;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty (uwiden_low x)))
(vec_extend (VecExtendOp.Uxtl) x $false (lane_size ty)))

;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type (ty_vec128 ty) (uwiden_high x)))
(vec_extend (VecExtendOp.Uxtl) x $true (lane_size ty)))

(rule (lower (has_type ty (uwiden_high x)))
(if (ty_vec64 ty))
(let ((tmp Reg (fpu_move_from_vec x 1 (VectorSize.Size32x2))))
(vec_extend (VecExtendOp.Uxtl) tmp $false (lane_size ty))))

;;;; Rules for `widening_pairwise_dot_product_s` ;;;;;;;;;;;;;;;;;;;;;;

;; The args have type I16X8.
;; "dst = i32x4.dot_i16x8_s(x, y)"
;; => smull tmp, x, y
;; smull2 dst, x, y
;; addp dst, tmp, dst
(rule (lower (has_type $I32X4 (widening_pairwise_dot_product_s x y)))
(let ((tmp Reg (vec_rrr_long (VecRRRLongOp.Smull16) x y $false))
(dst Reg (vec_rrr_long (VecRRRLongOp.Smull16) x y $true)))
(vec_rrr (VecALUOp.Addp) tmp dst (VectorSize.Size32x4))))

;;;; Rules for `Fence` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (fence))
Expand Down
20 changes: 20 additions & 0 deletions cranelift/codegen/src/isa/aarch64/lower_dynamic_neon.isle
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,23 @@
;;; Rules for `extract_vector` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
(rule (lower (extract_vector x 0))
(value_reg (fpu_move_128 (put_in_reg x))))

;;;; Rules for `swiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty (swiden_low x)))
(value_reg (vec_extend (VecExtendOp.Sxtl) x $false (lane_size ty))))

;;;; Rules for `swiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty (swiden_high x)))
(value_reg (vec_extend (VecExtendOp.Sxtl) x $true (lane_size ty))))

;;;; Rules for `uwiden_low` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty (uwiden_low x)))
(value_reg (vec_extend (VecExtendOp.Uxtl) x $false (lane_size ty))))

;;;; Rules for `uwiden_high` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty (uwiden_high x)))
(value_reg (vec_extend (VecExtendOp.Uxtl) x $true (lane_size ty))))
Loading

0 comments on commit 3b68d76

Please sign in to comment.