Skip to content

Commit

Permalink
Merge pull request #3679 from FreddieLiardet/fp_const_fmov
Browse files Browse the repository at this point in the history
Improve code generation for floating-point constants
  • Loading branch information
cfallin authored Jan 19, 2022
2 parents 2649d23 + b553158 commit 4a331b8
Show file tree
Hide file tree
Showing 10 changed files with 490 additions and 309 deletions.
6 changes: 6 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst.isle
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,12 @@
(rn Reg)
(size ScalarSize))

;; Loads a floating-point immediate.
(FpuMoveFPImm
(rd WritableReg)
(imm ASIMDFPModImm)
(size ScalarSize))

;; Move to a vector element from a GPR.
(MovToVec
(rd WritableReg)
Expand Down
13 changes: 13 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1983,6 +1983,19 @@ impl MachInstEmit for Inst {
};
sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
}
&Inst::FpuMoveFPImm { rd, imm, size } => {
let size_code = match size {
ScalarSize::Size32 => 0b00,
ScalarSize::Size64 => 0b01,
_ => unimplemented!(),
};
sink.put4(
0b000_11110_00_1_00_000_000100_00000_00000
| size_code << 22
| ((imm.enc_bits() as u32) << 13)
| machreg_to_vec(rd.to_reg()),
);
}
&Inst::MovToVec { rd, rn, idx, size } => {
let (imm5, shift) = match size.lane_size() {
ScalarSize::Size8 => (0b00001, 1),
Expand Down
19 changes: 19 additions & 0 deletions cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2051,6 +2051,25 @@ fn test_aarch64_binemit() {
"8103271E",
"fmov s1, w28",
));
insns.push((
Inst::FpuMoveFPImm {
rd: writable_vreg(31),
imm: ASIMDFPModImm::maybe_from_u64(f64::to_bits(1.0), ScalarSize::Size64).unwrap(),
size: ScalarSize::Size64,
},
"1F106E1E",
"fmov d31, #1",
));
insns.push((
Inst::FpuMoveFPImm {
rd: writable_vreg(1),
imm: ASIMDFPModImm::maybe_from_u64(f32::to_bits(31.0).into(), ScalarSize::Size32)
.unwrap(),
size: ScalarSize::Size32,
},
"01F0271E",
"fmov s1, #31",
));
insns.push((
Inst::MovToVec {
rd: writable_vreg(0),
Expand Down
52 changes: 41 additions & 11 deletions cranelift/codegen/src/isa/aarch64/inst/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -239,29 +239,35 @@ impl Inst {
/// Create instructions that load a 32-bit floating-point constant.
pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
rd: Writable<Reg>,
value: u32,
const_data: u32,
mut alloc_tmp: F,
) -> SmallVec<[Inst; 4]> {
// Note that we must make sure that all bits outside the lowest 32 are set to 0
// because this function is also used to load wider constants (that have zeros
// in their most significant bits).
if value == 0 {
if const_data == 0 {
smallvec![Inst::VecDupImm {
rd,
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
invert: false,
size: VectorSize::Size32x2
size: VectorSize::Size32x2,
}]
} else if let Some(imm) =
ASIMDFPModImm::maybe_from_u64(const_data.into(), ScalarSize::Size32)
{
smallvec![Inst::FpuMoveFPImm {
rd,
imm,
size: ScalarSize::Size32,
}]
} else {
// TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
// bits.
let tmp = alloc_tmp(I32);
let mut insts = Inst::load_constant(tmp, value as u64);
let mut insts = Inst::load_constant(tmp, const_data as u64);

insts.push(Inst::MovToFpu {
rd,
rn: tmp.to_reg(),
size: ScalarSize::Size64,
size: ScalarSize::Size32,
});

insts
Expand All @@ -277,11 +283,23 @@ impl Inst {
// Note that we must make sure that all bits outside the lowest 64 are set to 0
// because this function is also used to load wider constants (that have zeros
// in their most significant bits).
if let Ok(const_data) = u32::try_from(const_data) {
// TODO: Treat as half of a 128 bit vector and consider replicated patterns.
// Scalar MOVI might also be an option.
if const_data == 0 {
smallvec![Inst::VecDupImm {
rd,
imm: ASIMDMovModImm::zero(ScalarSize::Size32),
invert: false,
size: VectorSize::Size32x2,
}]
} else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(const_data, ScalarSize::Size64) {
smallvec![Inst::FpuMoveFPImm {
rd,
imm,
size: ScalarSize::Size64,
}]
} else if let Ok(const_data) = u32::try_from(const_data) {
Inst::load_fp_constant32(rd, const_data, alloc_tmp)
// TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
// bits. Also, treat it as half of a 128-bit vector and consider replicated
// patterns. Scalar MOVI might also be an option.
} else if const_data & (u32::MAX as u64) == 0 {
let tmp = alloc_tmp(I64);
let mut insts = Inst::load_constant(tmp, const_data);
Expand Down Expand Up @@ -879,6 +897,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
collector.add_def(rd);
collector.add_use(rn);
}
&Inst::FpuMoveFPImm { rd, .. } => {
collector.add_def(rd);
}
&Inst::MovToVec { rd, rn, .. } => {
collector.add_mod(rd);
collector.add_use(rn);
Expand Down Expand Up @@ -1654,6 +1675,9 @@ pub fn aarch64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
mapper.map_def(rd);
mapper.map_use(rn);
}
&mut Inst::FpuMoveFPImm { ref mut rd, .. } => {
mapper.map_def(rd);
}
&mut Inst::MovToVec {
ref mut rd,
ref mut rn,
Expand Down Expand Up @@ -2693,6 +2717,12 @@ impl Inst {
let rn = show_ireg_sized(rn, mb_rru, operand_size);
format!("fmov {}, {}", rd, rn)
}
&Inst::FpuMoveFPImm { rd, imm, size } => {
let imm = imm.show_rru(mb_rru);
let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);

format!("fmov {}, {}", rd, imm)
}
&Inst::MovToVec { rd, rn, idx, size } => {
let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
let rn = show_ireg_sized(rn, mb_rru, size.operand_size());
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
src/clif.isle f176ef3bba99365
src/prelude.isle 22dd5ff133398960
src/isa/aarch64/inst.isle 5fa80451697b084f
src/isa/aarch64/inst.isle f946561093de4ff5
src/isa/aarch64/lower.isle 2d2e1e076a0c8a23
Loading

0 comments on commit 4a331b8

Please sign in to comment.