Move some functions from the 'float' implementation into a separate file

This is in preparation for a s2f implememtation. No functional change.
ulfjack · May 25, 2020 · f7ca563 · f7ca563
1 parent 27d3c55
commit f7ca563
Show file tree

Hide file tree

Showing 2 changed files with 123 additions and 99 deletions.
diff --git a/ryu/f2s.c b/ryu/f2s.c
@@ -32,111 +32,13 @@
 #endif
 
 #include "ryu/common.h"
+#include "ryu/f2s_intrinsics.h"
 #include "ryu/digit_table.h"
 
-#if defined(RYU_FLOAT_FULL_TABLE)
-
-#include "ryu/f2s_full_table.h"
-
-#else
-
-#if defined(RYU_OPTIMIZE_SIZE)
-#include "ryu/d2s_small_table.h"
-#else
-#include "ryu/d2s_full_table.h"
-#endif
-#define FLOAT_POW5_INV_BITCOUNT (DOUBLE_POW5_INV_BITCOUNT - 64)
-#define FLOAT_POW5_BITCOUNT (DOUBLE_POW5_BITCOUNT - 64)
-
-#endif
-
 #define FLOAT_MANTISSA_BITS 23
 #define FLOAT_EXPONENT_BITS 8
 #define FLOAT_BIAS 127
 
-static inline uint32_t pow5factor_32(uint32_t value) {
-  uint32_t count = 0;
-  for (;;) {
-    assert(value != 0);
-    const uint32_t q = value / 5;
-    const uint32_t r = value % 5;
-    if (r != 0) {
-      break;
-    }
-    value = q;
-    ++count;
-  }
-  return count;
-}
-
-// Returns true if value is divisible by 5^p.
-static inline bool multipleOfPowerOf5_32(const uint32_t value, const uint32_t p) {
-  return pow5factor_32(value) >= p;
-}
-
-// Returns true if value is divisible by 2^p.
-static inline bool multipleOfPowerOf2_32(const uint32_t value, const uint32_t p) {
-  // __builtin_ctz doesn't appear to be faster here.
-  return (value & ((1u << p) - 1)) == 0;
-}
-
-// It seems to be slightly faster to avoid uint128_t here, although the
-// generated code for uint128_t looks slightly nicer.
-static inline uint32_t mulShift32(const uint32_t m, const uint64_t factor, const int32_t shift) {
-  assert(shift > 32);
-
-  // The casts here help MSVC to avoid calls to the __allmul library
-  // function.
-  const uint32_t factorLo = (uint32_t)(factor);
-  const uint32_t factorHi = (uint32_t)(factor >> 32);
-  const uint64_t bits0 = (uint64_t)m * factorLo;
-  const uint64_t bits1 = (uint64_t)m * factorHi;
-
-#ifdef RYU_32_BIT_PLATFORM
-  // On 32-bit platforms we can avoid a 64-bit shift-right since we only
-  // need the upper 32 bits of the result and the shift value is > 32.
-  const uint32_t bits0Hi = (uint32_t)(bits0 >> 32);
-  uint32_t bits1Lo = (uint32_t)(bits1);
-  uint32_t bits1Hi = (uint32_t)(bits1 >> 32);
-  bits1Lo += bits0Hi;
-  bits1Hi += (bits1Lo < bits0Hi);
-  const int32_t s = shift - 32;
-  return (bits1Hi << (32 - s)) | (bits1Lo >> s);
-#else // RYU_32_BIT_PLATFORM
-  const uint64_t sum = (bits0 >> 32) + bits1;
-  const uint64_t shiftedSum = sum >> (shift - 32);
-  assert(shiftedSum <= UINT32_MAX);
-  return (uint32_t) shiftedSum;
-#endif // RYU_32_BIT_PLATFORM
-}
-
-static inline uint32_t mulPow5InvDivPow2(const uint32_t m, const uint32_t q, const int32_t j) {
-#if defined(RYU_FLOAT_FULL_TABLE)
-  return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j);
-#elif defined(RYU_OPTIMIZE_SIZE)
-  // The inverse multipliers are defined as [2^x / 5^y] + 1; the upper 64 bits from the double lookup
-  // table are the correct bits for [2^x / 5^y], so we have to add 1 here. Note that we rely on the
-  // fact that the added 1 that's already stored in the table never overflows into the upper 64 bits.
-  uint64_t pow5[2];
-  double_computeInvPow5(q, pow5);
-  return mulShift32(m, pow5[1] + 1, j);
-#else
-  return mulShift32(m, DOUBLE_POW5_INV_SPLIT[q][1] + 1, j);
-#endif
-}
-
-static inline uint32_t mulPow5divPow2(const uint32_t m, const uint32_t i, const int32_t j) {
-#if defined(RYU_FLOAT_FULL_TABLE)
-  return mulShift32(m, FLOAT_POW5_SPLIT[i], j);
-#elif defined(RYU_OPTIMIZE_SIZE)
-  uint64_t pow5[2];
-  double_computePow5(i, pow5);
-  return mulShift32(m, pow5[1], j);
-#else
-  return mulShift32(m, DOUBLE_POW5_SPLIT[i][1], j);
-#endif
-}
-
 // A floating decimal representing m * 10^e.
 typedef struct floating_decimal_32 {
   uint32_t mantissa;

diff --git a/ryu/f2s_intrinsics.h b/ryu/f2s_intrinsics.h
@@ -0,0 +1,122 @@
+// Copyright 2018 Ulf Adams
+//
+// The contents of this file may be used under the terms of the Apache License,
+// Version 2.0.
+//
+//    (See accompanying file LICENSE-Apache or copy at
+//     http://www.apache.org/licenses/LICENSE-2.0)
+//
+// Alternatively, the contents of this file may be used under the terms of
+// the Boost Software License, Version 1.0.
+//    (See accompanying file LICENSE-Boost or copy at
+//     https://www.boost.org/LICENSE_1_0.txt)
+//
+// Unless required by applicable law or agreed to in writing, this software
+// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.
+#ifndef RYU_F2S_INTRINSICS_H
+#define RYU_F2S_INTRINSICS_H
+
+// Defines RYU_32_BIT_PLATFORM if applicable.
+#include "ryu/common.h"
+
+#if defined(RYU_FLOAT_FULL_TABLE)
+
+#include "ryu/f2s_full_table.h"
+
+#else
+
+#if defined(RYU_OPTIMIZE_SIZE)
+#include "ryu/d2s_small_table.h"
+#else
+#include "ryu/d2s_full_table.h"
+#endif
+#define FLOAT_POW5_INV_BITCOUNT (DOUBLE_POW5_INV_BITCOUNT - 64)
+#define FLOAT_POW5_BITCOUNT (DOUBLE_POW5_BITCOUNT - 64)
+
+#endif
+
+static inline uint32_t pow5factor_32(uint32_t value) {
+  uint32_t count = 0;
+  for (;;) {
+    assert(value != 0);
+    const uint32_t q = value / 5;
+    const uint32_t r = value % 5;
+    if (r != 0) {
+      break;
+    }
+    value = q;
+    ++count;
+  }
+  return count;
+}
+
+// Returns true if value is divisible by 5^p.
+static inline bool multipleOfPowerOf5_32(const uint32_t value, const uint32_t p) {
+  return pow5factor_32(value) >= p;
+}
+
+// Returns true if value is divisible by 2^p.
+static inline bool multipleOfPowerOf2_32(const uint32_t value, const uint32_t p) {
+  // __builtin_ctz doesn't appear to be faster here.
+  return (value & ((1u << p) - 1)) == 0;
+}
+
+// It seems to be slightly faster to avoid uint128_t here, although the
+// generated code for uint128_t looks slightly nicer.
+static inline uint32_t mulShift32(const uint32_t m, const uint64_t factor, const int32_t shift) {
+  assert(shift > 32);
+
+  // The casts here help MSVC to avoid calls to the __allmul library
+  // function.
+  const uint32_t factorLo = (uint32_t)(factor);
+  const uint32_t factorHi = (uint32_t)(factor >> 32);
+  const uint64_t bits0 = (uint64_t)m * factorLo;
+  const uint64_t bits1 = (uint64_t)m * factorHi;
+
+#ifdef RYU_32_BIT_PLATFORM
+  // On 32-bit platforms we can avoid a 64-bit shift-right since we only
+  // need the upper 32 bits of the result and the shift value is > 32.
+  const uint32_t bits0Hi = (uint32_t)(bits0 >> 32);
+  uint32_t bits1Lo = (uint32_t)(bits1);
+  uint32_t bits1Hi = (uint32_t)(bits1 >> 32);
+  bits1Lo += bits0Hi;
+  bits1Hi += (bits1Lo < bits0Hi);
+  const int32_t s = shift - 32;
+  return (bits1Hi << (32 - s)) | (bits1Lo >> s);
+#else // RYU_32_BIT_PLATFORM
+  const uint64_t sum = (bits0 >> 32) + bits1;
+  const uint64_t shiftedSum = sum >> (shift - 32);
+  assert(shiftedSum <= UINT32_MAX);
+  return (uint32_t) shiftedSum;
+#endif // RYU_32_BIT_PLATFORM
+}
+
+static inline uint32_t mulPow5InvDivPow2(const uint32_t m, const uint32_t q, const int32_t j) {
+#if defined(RYU_FLOAT_FULL_TABLE)
+  return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j);
+#elif defined(RYU_OPTIMIZE_SIZE)
+  // The inverse multipliers are defined as [2^x / 5^y] + 1; the upper 64 bits from the double lookup
+  // table are the correct bits for [2^x / 5^y], so we have to add 1 here. Note that we rely on the
+  // fact that the added 1 that's already stored in the table never overflows into the upper 64 bits.
+  uint64_t pow5[2];
+  double_computeInvPow5(q, pow5);
+  return mulShift32(m, pow5[1] + 1, j);
+#else
+  return mulShift32(m, DOUBLE_POW5_INV_SPLIT[q][1] + 1, j);
+#endif
+}
+
+static inline uint32_t mulPow5divPow2(const uint32_t m, const uint32_t i, const int32_t j) {
+#if defined(RYU_FLOAT_FULL_TABLE)
+  return mulShift32(m, FLOAT_POW5_SPLIT[i], j);
+#elif defined(RYU_OPTIMIZE_SIZE)
+  uint64_t pow5[2];
+  double_computePow5(i, pow5);
+  return mulShift32(m, pow5[1], j);
+#else
+  return mulShift32(m, DOUBLE_POW5_SPLIT[i][1], j);
+#endif
+}
+
+#endif // RYU_F2S_INTRINSICS_H