Skip to content

Commit

Permalink
Move some functions from the 'float' implementation into a separate file
Browse files Browse the repository at this point in the history
This is in preparation for a s2f implememtation.
No functional change.
  • Loading branch information
abolz authored and ulfjack committed May 25, 2020
1 parent 27d3c55 commit f7ca563
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 99 deletions.
100 changes: 1 addition & 99 deletions ryu/f2s.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,111 +32,13 @@
#endif

#include "ryu/common.h"
#include "ryu/f2s_intrinsics.h"
#include "ryu/digit_table.h"

#if defined(RYU_FLOAT_FULL_TABLE)

#include "ryu/f2s_full_table.h"

#else

#if defined(RYU_OPTIMIZE_SIZE)
#include "ryu/d2s_small_table.h"
#else
#include "ryu/d2s_full_table.h"
#endif
#define FLOAT_POW5_INV_BITCOUNT (DOUBLE_POW5_INV_BITCOUNT - 64)
#define FLOAT_POW5_BITCOUNT (DOUBLE_POW5_BITCOUNT - 64)

#endif

#define FLOAT_MANTISSA_BITS 23
#define FLOAT_EXPONENT_BITS 8
#define FLOAT_BIAS 127

static inline uint32_t pow5factor_32(uint32_t value) {
uint32_t count = 0;
for (;;) {
assert(value != 0);
const uint32_t q = value / 5;
const uint32_t r = value % 5;
if (r != 0) {
break;
}
value = q;
++count;
}
return count;
}

// Returns true if value is divisible by 5^p.
static inline bool multipleOfPowerOf5_32(const uint32_t value, const uint32_t p) {
return pow5factor_32(value) >= p;
}

// Returns true if value is divisible by 2^p.
static inline bool multipleOfPowerOf2_32(const uint32_t value, const uint32_t p) {
// __builtin_ctz doesn't appear to be faster here.
return (value & ((1u << p) - 1)) == 0;
}

// It seems to be slightly faster to avoid uint128_t here, although the
// generated code for uint128_t looks slightly nicer.
static inline uint32_t mulShift32(const uint32_t m, const uint64_t factor, const int32_t shift) {
assert(shift > 32);

// The casts here help MSVC to avoid calls to the __allmul library
// function.
const uint32_t factorLo = (uint32_t)(factor);
const uint32_t factorHi = (uint32_t)(factor >> 32);
const uint64_t bits0 = (uint64_t)m * factorLo;
const uint64_t bits1 = (uint64_t)m * factorHi;

#ifdef RYU_32_BIT_PLATFORM
// On 32-bit platforms we can avoid a 64-bit shift-right since we only
// need the upper 32 bits of the result and the shift value is > 32.
const uint32_t bits0Hi = (uint32_t)(bits0 >> 32);
uint32_t bits1Lo = (uint32_t)(bits1);
uint32_t bits1Hi = (uint32_t)(bits1 >> 32);
bits1Lo += bits0Hi;
bits1Hi += (bits1Lo < bits0Hi);
const int32_t s = shift - 32;
return (bits1Hi << (32 - s)) | (bits1Lo >> s);
#else // RYU_32_BIT_PLATFORM
const uint64_t sum = (bits0 >> 32) + bits1;
const uint64_t shiftedSum = sum >> (shift - 32);
assert(shiftedSum <= UINT32_MAX);
return (uint32_t) shiftedSum;
#endif // RYU_32_BIT_PLATFORM
}

static inline uint32_t mulPow5InvDivPow2(const uint32_t m, const uint32_t q, const int32_t j) {
#if defined(RYU_FLOAT_FULL_TABLE)
return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j);
#elif defined(RYU_OPTIMIZE_SIZE)
// The inverse multipliers are defined as [2^x / 5^y] + 1; the upper 64 bits from the double lookup
// table are the correct bits for [2^x / 5^y], so we have to add 1 here. Note that we rely on the
// fact that the added 1 that's already stored in the table never overflows into the upper 64 bits.
uint64_t pow5[2];
double_computeInvPow5(q, pow5);
return mulShift32(m, pow5[1] + 1, j);
#else
return mulShift32(m, DOUBLE_POW5_INV_SPLIT[q][1] + 1, j);
#endif
}

static inline uint32_t mulPow5divPow2(const uint32_t m, const uint32_t i, const int32_t j) {
#if defined(RYU_FLOAT_FULL_TABLE)
return mulShift32(m, FLOAT_POW5_SPLIT[i], j);
#elif defined(RYU_OPTIMIZE_SIZE)
uint64_t pow5[2];
double_computePow5(i, pow5);
return mulShift32(m, pow5[1], j);
#else
return mulShift32(m, DOUBLE_POW5_SPLIT[i][1], j);
#endif
}

// A floating decimal representing m * 10^e.
typedef struct floating_decimal_32 {
uint32_t mantissa;
Expand Down
122 changes: 122 additions & 0 deletions ryu/f2s_intrinsics.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
// Copyright 2018 Ulf Adams
//
// The contents of this file may be used under the terms of the Apache License,
// Version 2.0.
//
// (See accompanying file LICENSE-Apache or copy at
// http://www.apache.org/licenses/LICENSE-2.0)
//
// Alternatively, the contents of this file may be used under the terms of
// the Boost Software License, Version 1.0.
// (See accompanying file LICENSE-Boost or copy at
// https://www.boost.org/LICENSE_1_0.txt)
//
// Unless required by applicable law or agreed to in writing, this software
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied.
#ifndef RYU_F2S_INTRINSICS_H
#define RYU_F2S_INTRINSICS_H

// Defines RYU_32_BIT_PLATFORM if applicable.
#include "ryu/common.h"

#if defined(RYU_FLOAT_FULL_TABLE)

#include "ryu/f2s_full_table.h"

#else

#if defined(RYU_OPTIMIZE_SIZE)
#include "ryu/d2s_small_table.h"
#else
#include "ryu/d2s_full_table.h"
#endif
#define FLOAT_POW5_INV_BITCOUNT (DOUBLE_POW5_INV_BITCOUNT - 64)
#define FLOAT_POW5_BITCOUNT (DOUBLE_POW5_BITCOUNT - 64)

#endif

static inline uint32_t pow5factor_32(uint32_t value) {
uint32_t count = 0;
for (;;) {
assert(value != 0);
const uint32_t q = value / 5;
const uint32_t r = value % 5;
if (r != 0) {
break;
}
value = q;
++count;
}
return count;
}

// Returns true if value is divisible by 5^p.
static inline bool multipleOfPowerOf5_32(const uint32_t value, const uint32_t p) {
return pow5factor_32(value) >= p;
}

// Returns true if value is divisible by 2^p.
static inline bool multipleOfPowerOf2_32(const uint32_t value, const uint32_t p) {
// __builtin_ctz doesn't appear to be faster here.
return (value & ((1u << p) - 1)) == 0;
}

// It seems to be slightly faster to avoid uint128_t here, although the
// generated code for uint128_t looks slightly nicer.
static inline uint32_t mulShift32(const uint32_t m, const uint64_t factor, const int32_t shift) {
assert(shift > 32);

// The casts here help MSVC to avoid calls to the __allmul library
// function.
const uint32_t factorLo = (uint32_t)(factor);
const uint32_t factorHi = (uint32_t)(factor >> 32);
const uint64_t bits0 = (uint64_t)m * factorLo;
const uint64_t bits1 = (uint64_t)m * factorHi;

#ifdef RYU_32_BIT_PLATFORM
// On 32-bit platforms we can avoid a 64-bit shift-right since we only
// need the upper 32 bits of the result and the shift value is > 32.
const uint32_t bits0Hi = (uint32_t)(bits0 >> 32);
uint32_t bits1Lo = (uint32_t)(bits1);
uint32_t bits1Hi = (uint32_t)(bits1 >> 32);
bits1Lo += bits0Hi;
bits1Hi += (bits1Lo < bits0Hi);
const int32_t s = shift - 32;
return (bits1Hi << (32 - s)) | (bits1Lo >> s);
#else // RYU_32_BIT_PLATFORM
const uint64_t sum = (bits0 >> 32) + bits1;
const uint64_t shiftedSum = sum >> (shift - 32);
assert(shiftedSum <= UINT32_MAX);
return (uint32_t) shiftedSum;
#endif // RYU_32_BIT_PLATFORM
}

static inline uint32_t mulPow5InvDivPow2(const uint32_t m, const uint32_t q, const int32_t j) {
#if defined(RYU_FLOAT_FULL_TABLE)
return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j);
#elif defined(RYU_OPTIMIZE_SIZE)
// The inverse multipliers are defined as [2^x / 5^y] + 1; the upper 64 bits from the double lookup
// table are the correct bits for [2^x / 5^y], so we have to add 1 here. Note that we rely on the
// fact that the added 1 that's already stored in the table never overflows into the upper 64 bits.
uint64_t pow5[2];
double_computeInvPow5(q, pow5);
return mulShift32(m, pow5[1] + 1, j);
#else
return mulShift32(m, DOUBLE_POW5_INV_SPLIT[q][1] + 1, j);
#endif
}

static inline uint32_t mulPow5divPow2(const uint32_t m, const uint32_t i, const int32_t j) {
#if defined(RYU_FLOAT_FULL_TABLE)
return mulShift32(m, FLOAT_POW5_SPLIT[i], j);
#elif defined(RYU_OPTIMIZE_SIZE)
uint64_t pow5[2];
double_computePow5(i, pow5);
return mulShift32(m, pow5[1], j);
#else
return mulShift32(m, DOUBLE_POW5_SPLIT[i][1], j);
#endif
}

#endif // RYU_F2S_INTRINSICS_H

0 comments on commit f7ca563

Please sign in to comment.