Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cranelift: Port vselect over to ISLE on x64 #3659

Merged
merged 1 commit into from
Jan 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions cranelift/codegen/src/isa/x64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -954,6 +954,28 @@
(rule (pandn src1 src2)
(xmm_rm_r $F64X2 (SseOpcode.Pandn) src1 src2))

(decl sse_blend_op (Type) SseOpcode)
(rule (sse_blend_op $F32X4) (SseOpcode.Blendvps))
(rule (sse_blend_op $F64X2) (SseOpcode.Blendvpd))
(rule (sse_blend_op (multi_lane _bits _lanes)) (SseOpcode.Pblendvb))

(decl sse_mov_op (Type) SseOpcode)
(rule (sse_mov_op $F32X4) (SseOpcode.Movaps))
(rule (sse_mov_op $F64X2) (SseOpcode.Movapd))
(rule (sse_mov_op (multi_lane _bits _lanes)) (SseOpcode.Movdqa))

;; Helper for creating `blendvp{d,s}` and `pblendvb` instructions.
(decl sse_blend (Type RegMem RegMem Reg) Reg)
(rule (sse_blend ty mask src1 src2)
;; Move the mask into `xmm0`, as blend instructions implicitly operate on
;; that register. (This kind of thing would normally happen inside of
;; `Inst::mov_mitosis`, but has to happen here, where we still have the
;; mask register, because the mask is implicit and doesn't appear in the
;; `Inst` itself.)
(let ((mask2 WritableReg (xmm0))
(_ Unit (emit (MInst.XmmUnaryRmR (sse_mov_op ty) mask mask2))))
(xmm_rm_r ty (sse_blend_op ty) src2 src1)))

;; Helper for creating `blendvpd` instructions.
(decl blendvpd (Reg RegMem Reg) Reg)
(rule (blendvpd src1 src2 mask)
Expand Down
9 changes: 9 additions & 0 deletions cranelift/codegen/src/isa/x64/lower.isle
Original file line number Diff line number Diff line change
Expand Up @@ -1050,6 +1050,15 @@
(b Reg (sse_and_not ty cond_reg (put_in_reg_mem if_false))))
(value_reg (sse_or ty b (RegMem.Reg a)))))

;;;; Rules for `vselect` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (has_type ty @ (multi_lane _bits _lanes)
(vselect condition if_true if_false)))
(value_reg (sse_blend ty
(put_in_reg_mem condition)
(put_in_reg_mem if_true)
(put_in_reg if_false))))

;;;; Rules for `insertlane` ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

(rule (lower (insertlane vec @ (value_type ty) val (u8_from_uimm8 idx)))
Expand Down
65 changes: 5 additions & 60 deletions cranelift/codegen/src/isa/x64/lower.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
match op {
Opcode::Iconst
| Opcode::Bconst
| Opcode::F32const
| Opcode::F64const
| Opcode::Null
| Opcode::Iadd
| Opcode::IaddIfcout
Expand All @@ -1535,50 +1537,8 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
| Opcode::Imin
| Opcode::Umin
| Opcode::Bnot
| Opcode::Bitselect => implemented_in_isle(ctx),

Opcode::Vselect => {
let ty = ty.unwrap();
let condition = put_input_in_reg(ctx, inputs[0]);
let condition_ty = ctx.input_ty(insn, 0);
let if_true = input_to_reg_mem(ctx, inputs[1]);
let if_false = put_input_in_reg(ctx, inputs[2]);
let dst = get_output_reg(ctx, outputs[0]).only_reg().unwrap();

if ty.is_vector() {
// `vselect` relies on the bit representation of the condition:
// vector boolean types are defined in Cranelift to be all 1s or
// all 0s. This lowering relies on that fact to use x86's
// variable blend instructions, which look at the _high_bit_ of
// the condition mask. All the bits of vector booleans will
// match (all 1s or all 0s), so we can just use the high bit.
assert!(condition_ty.lane_type().is_bool());

// Variable blend instructions expect the condition mask to be
// in XMM0.
let xmm0 = Writable::from_reg(regs::xmm0());
ctx.emit(Inst::gen_move(xmm0, condition, ty));

// Match up the source and destination registers for regalloc.
ctx.emit(Inst::gen_move(dst, if_false, ty));

// Technically PBLENDVB would work in all cases (since the bytes
// inside the mask will be all 1s or 0s we can blend
// byte-by-byte instead of word-by-word, e.g.) but
// type-specialized versions are included here for clarity when
// troubleshooting and due to slight improvements in
// latency/throughput on certain processor families.
let opcode = match condition_ty {
types::B64X2 => SseOpcode::Blendvpd,
types::B32X4 => SseOpcode::Blendvps,
types::B16X8 | types::B8X16 => SseOpcode::Pblendvb,
_ => unimplemented!("unable lower vselect for type: {}", condition_ty),
};
ctx.emit(Inst::xmm_rm_r(opcode, if_true, dst));
} else {
unimplemented!("no lowering for scalar vselect instruction")
}
}
| Opcode::Bitselect
| Opcode::Vselect => implemented_in_isle(ctx),

Opcode::Ishl | Opcode::Ushr | Opcode::Sshr | Opcode::Rotl | Opcode::Rotr => {
let dst_ty = ctx.output_ty(insn, 0);
Expand Down Expand Up @@ -3254,22 +3214,6 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
};
}

Opcode::F64const => {
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
}

Opcode::F32const => {
unreachable!(
"implemented in ISLE: inst = `{}`, type = `{:?}`",
ctx.dfg().display_inst(insn),
ty
);
}

Opcode::WideningPairwiseDotProductS => {
let lhs = put_input_in_reg(ctx, inputs[0]);
let rhs = input_to_reg_mem(ctx, inputs[1]);
Expand Down Expand Up @@ -5927,6 +5871,7 @@ fn lower_insn_to_regs<C: LowerCtx<I = Inst>>(
println!("Did not match fcvt input!");
}
}

// Unimplemented opcodes below. These are not currently used by Wasm
// lowering or other known embeddings, but should be either supported or
// removed eventually.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
src/clif.isle f176ef3bba99365
src/prelude.isle babc931e5dc5b4cf
src/isa/x64/inst.isle fb5d3ac8e68c46d2
src/isa/x64/lower.isle 5d66b88a371d4d70
src/isa/x64/inst.isle bc5fc626492752c8
src/isa/x64/lower.isle 33e94300f4c08455
Loading