Merge pull request #3679 from FreddieLiardet/fp_const_fmov

Improve code generation for floating-point constants
bytecodealliance · Jan 19, 2022 · 4a331b8 · 4a331b8
2 parents 2649d23 + b553158
commit 4a331b8
Show file tree

Hide file tree

Showing 10 changed files with 490 additions and 309 deletions.
diff --git a/cranelift/codegen/src/isa/aarch64/inst.isle b/cranelift/codegen/src/isa/aarch64/inst.isle
@@ -454,6 +454,12 @@
       (rn Reg)
       (size ScalarSize))
 
+    ;; Loads a floating-point immediate.
+    (FpuMoveFPImm
+      (rd WritableReg)
+      (imm ASIMDFPModImm)
+      (size ScalarSize))
+
     ;; Move to a vector element from a GPR.
     (MovToVec
       (rd WritableReg)

diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit.rs b/cranelift/codegen/src/isa/aarch64/inst/emit.rs
@@ -1983,6 +1983,19 @@ impl MachInstEmit for Inst {
                 };
                 sink.put4(template | (machreg_to_gpr(rn) << 5) | machreg_to_vec(rd.to_reg()));
             }
+            &Inst::FpuMoveFPImm { rd, imm, size } => {
+                let size_code = match size {
+                    ScalarSize::Size32 => 0b00,
+                    ScalarSize::Size64 => 0b01,
+                    _ => unimplemented!(),
+                };
+                sink.put4(
+                    0b000_11110_00_1_00_000_000100_00000_00000
+                        | size_code << 22
+                        | ((imm.enc_bits() as u32) << 13)
+                        | machreg_to_vec(rd.to_reg()),
+                );
+            }
             &Inst::MovToVec { rd, rn, idx, size } => {
                 let (imm5, shift) = match size.lane_size() {
                     ScalarSize::Size8 => (0b00001, 1),

diff --git a/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs b/cranelift/codegen/src/isa/aarch64/inst/emit_tests.rs
@@ -2051,6 +2051,25 @@ fn test_aarch64_binemit() {
         "8103271E",
         "fmov s1, w28",
     ));
+    insns.push((
+        Inst::FpuMoveFPImm {
+            rd: writable_vreg(31),
+            imm: ASIMDFPModImm::maybe_from_u64(f64::to_bits(1.0), ScalarSize::Size64).unwrap(),
+            size: ScalarSize::Size64,
+        },
+        "1F106E1E",
+        "fmov d31, #1",
+    ));
+    insns.push((
+        Inst::FpuMoveFPImm {
+            rd: writable_vreg(1),
+            imm: ASIMDFPModImm::maybe_from_u64(f32::to_bits(31.0).into(), ScalarSize::Size32)
+                .unwrap(),
+            size: ScalarSize::Size32,
+        },
+        "01F0271E",
+        "fmov s1, #31",
+    ));
     insns.push((
         Inst::MovToVec {
             rd: writable_vreg(0),

diff --git a/cranelift/codegen/src/isa/aarch64/inst/mod.rs b/cranelift/codegen/src/isa/aarch64/inst/mod.rs
@@ -239,29 +239,35 @@ impl Inst {
     /// Create instructions that load a 32-bit floating-point constant.
     pub fn load_fp_constant32<F: FnMut(Type) -> Writable<Reg>>(
         rd: Writable<Reg>,
-        value: u32,
+        const_data: u32,
         mut alloc_tmp: F,
     ) -> SmallVec<[Inst; 4]> {
         // Note that we must make sure that all bits outside the lowest 32 are set to 0
         // because this function is also used to load wider constants (that have zeros
         // in their most significant bits).
-        if value == 0 {
+        if const_data == 0 {
             smallvec![Inst::VecDupImm {
                 rd,
                 imm: ASIMDMovModImm::zero(ScalarSize::Size32),
                 invert: false,
-                size: VectorSize::Size32x2
+                size: VectorSize::Size32x2,
+            }]
+        } else if let Some(imm) =
+            ASIMDFPModImm::maybe_from_u64(const_data.into(), ScalarSize::Size32)
+        {
+            smallvec![Inst::FpuMoveFPImm {
+                rd,
+                imm,
+                size: ScalarSize::Size32,
             }]
         } else {
-            // TODO: use FMOV immediate form when `value` has sufficiently few mantissa/exponent
-            // bits.
             let tmp = alloc_tmp(I32);
-            let mut insts = Inst::load_constant(tmp, value as u64);
+            let mut insts = Inst::load_constant(tmp, const_data as u64);
 
             insts.push(Inst::MovToFpu {
                 rd,
                 rn: tmp.to_reg(),
-                size: ScalarSize::Size64,
+                size: ScalarSize::Size32,
             });
 
             insts
@@ -277,11 +283,23 @@ impl Inst {
         // Note that we must make sure that all bits outside the lowest 64 are set to 0
         // because this function is also used to load wider constants (that have zeros
         // in their most significant bits).
-        if let Ok(const_data) = u32::try_from(const_data) {
+        // TODO: Treat as half of a 128 bit vector and consider replicated patterns.
+        // Scalar MOVI might also be an option.
+        if const_data == 0 {
+            smallvec![Inst::VecDupImm {
+                rd,
+                imm: ASIMDMovModImm::zero(ScalarSize::Size32),
+                invert: false,
+                size: VectorSize::Size32x2,
+            }]
+        } else if let Some(imm) = ASIMDFPModImm::maybe_from_u64(const_data, ScalarSize::Size64) {
+            smallvec![Inst::FpuMoveFPImm {
+                rd,
+                imm,
+                size: ScalarSize::Size64,
+            }]
+        } else if let Ok(const_data) = u32::try_from(const_data) {
             Inst::load_fp_constant32(rd, const_data, alloc_tmp)
-        // TODO: use FMOV immediate form when `const_data` has sufficiently few mantissa/exponent
-        // bits.  Also, treat it as half of a 128-bit vector and consider replicated
-        // patterns. Scalar MOVI might also be an option.
         } else if const_data & (u32::MAX as u64) == 0 {
             let tmp = alloc_tmp(I64);
             let mut insts = Inst::load_constant(tmp, const_data);
@@ -879,6 +897,9 @@ fn aarch64_get_regs(inst: &Inst, collector: &mut RegUsageCollector) {
             collector.add_def(rd);
             collector.add_use(rn);
         }
+        &Inst::FpuMoveFPImm { rd, .. } => {
+            collector.add_def(rd);
+        }
         &Inst::MovToVec { rd, rn, .. } => {
             collector.add_mod(rd);
             collector.add_use(rn);
@@ -1654,6 +1675,9 @@ pub fn aarch64_map_regs<RM: RegMapper>(inst: &mut Inst, mapper: &RM) {
             mapper.map_def(rd);
             mapper.map_use(rn);
         }
+        &mut Inst::FpuMoveFPImm { ref mut rd, .. } => {
+            mapper.map_def(rd);
+        }
         &mut Inst::MovToVec {
             ref mut rd,
             ref mut rn,
@@ -2693,6 +2717,12 @@ impl Inst {
                 let rn = show_ireg_sized(rn, mb_rru, operand_size);
                 format!("fmov {}, {}", rd, rn)
             }
+            &Inst::FpuMoveFPImm { rd, imm, size } => {
+                let imm = imm.show_rru(mb_rru);
+                let rd = show_vreg_scalar(rd.to_reg(), mb_rru, size);
+
+                format!("fmov {}, {}", rd, imm)
+            }
             &Inst::MovToVec { rd, rn, idx, size } => {
                 let rd = show_vreg_element(rd.to_reg(), mb_rru, idx, size);
                 let rn = show_ireg_sized(rn, mb_rru, size.operand_size());

diff --git a/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest b/cranelift/codegen/src/isa/aarch64/lower/isle/generated_code.manifest
@@ -1,4 +1,4 @@
 src/clif.isle f176ef3bba99365
 src/prelude.isle 22dd5ff133398960
-src/isa/aarch64/inst.isle 5fa80451697b084f
+src/isa/aarch64/inst.isle f946561093de4ff5
 src/isa/aarch64/lower.isle 2d2e1e076a0c8a23