-
Notifications
You must be signed in to change notification settings - Fork 99
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Move some functions from the 'float' implementation into a separate file
This is in preparation for a s2f implememtation. No functional change.
- Loading branch information
Showing
2 changed files
with
123 additions
and
99 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
// Copyright 2018 Ulf Adams | ||
// | ||
// The contents of this file may be used under the terms of the Apache License, | ||
// Version 2.0. | ||
// | ||
// (See accompanying file LICENSE-Apache or copy at | ||
// http://www.apache.org/licenses/LICENSE-2.0) | ||
// | ||
// Alternatively, the contents of this file may be used under the terms of | ||
// the Boost Software License, Version 1.0. | ||
// (See accompanying file LICENSE-Boost or copy at | ||
// https://www.boost.org/LICENSE_1_0.txt) | ||
// | ||
// Unless required by applicable law or agreed to in writing, this software | ||
// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. | ||
#ifndef RYU_F2S_INTRINSICS_H | ||
#define RYU_F2S_INTRINSICS_H | ||
|
||
// Defines RYU_32_BIT_PLATFORM if applicable. | ||
#include "ryu/common.h" | ||
|
||
#if defined(RYU_FLOAT_FULL_TABLE) | ||
|
||
#include "ryu/f2s_full_table.h" | ||
|
||
#else | ||
|
||
#if defined(RYU_OPTIMIZE_SIZE) | ||
#include "ryu/d2s_small_table.h" | ||
#else | ||
#include "ryu/d2s_full_table.h" | ||
#endif | ||
#define FLOAT_POW5_INV_BITCOUNT (DOUBLE_POW5_INV_BITCOUNT - 64) | ||
#define FLOAT_POW5_BITCOUNT (DOUBLE_POW5_BITCOUNT - 64) | ||
|
||
#endif | ||
|
||
static inline uint32_t pow5factor_32(uint32_t value) { | ||
uint32_t count = 0; | ||
for (;;) { | ||
assert(value != 0); | ||
const uint32_t q = value / 5; | ||
const uint32_t r = value % 5; | ||
if (r != 0) { | ||
break; | ||
} | ||
value = q; | ||
++count; | ||
} | ||
return count; | ||
} | ||
|
||
// Returns true if value is divisible by 5^p. | ||
static inline bool multipleOfPowerOf5_32(const uint32_t value, const uint32_t p) { | ||
return pow5factor_32(value) >= p; | ||
} | ||
|
||
// Returns true if value is divisible by 2^p. | ||
static inline bool multipleOfPowerOf2_32(const uint32_t value, const uint32_t p) { | ||
// __builtin_ctz doesn't appear to be faster here. | ||
return (value & ((1u << p) - 1)) == 0; | ||
} | ||
|
||
// It seems to be slightly faster to avoid uint128_t here, although the | ||
// generated code for uint128_t looks slightly nicer. | ||
static inline uint32_t mulShift32(const uint32_t m, const uint64_t factor, const int32_t shift) { | ||
assert(shift > 32); | ||
|
||
// The casts here help MSVC to avoid calls to the __allmul library | ||
// function. | ||
const uint32_t factorLo = (uint32_t)(factor); | ||
const uint32_t factorHi = (uint32_t)(factor >> 32); | ||
const uint64_t bits0 = (uint64_t)m * factorLo; | ||
const uint64_t bits1 = (uint64_t)m * factorHi; | ||
|
||
#ifdef RYU_32_BIT_PLATFORM | ||
// On 32-bit platforms we can avoid a 64-bit shift-right since we only | ||
// need the upper 32 bits of the result and the shift value is > 32. | ||
const uint32_t bits0Hi = (uint32_t)(bits0 >> 32); | ||
uint32_t bits1Lo = (uint32_t)(bits1); | ||
uint32_t bits1Hi = (uint32_t)(bits1 >> 32); | ||
bits1Lo += bits0Hi; | ||
bits1Hi += (bits1Lo < bits0Hi); | ||
const int32_t s = shift - 32; | ||
return (bits1Hi << (32 - s)) | (bits1Lo >> s); | ||
#else // RYU_32_BIT_PLATFORM | ||
const uint64_t sum = (bits0 >> 32) + bits1; | ||
const uint64_t shiftedSum = sum >> (shift - 32); | ||
assert(shiftedSum <= UINT32_MAX); | ||
return (uint32_t) shiftedSum; | ||
#endif // RYU_32_BIT_PLATFORM | ||
} | ||
|
||
static inline uint32_t mulPow5InvDivPow2(const uint32_t m, const uint32_t q, const int32_t j) { | ||
#if defined(RYU_FLOAT_FULL_TABLE) | ||
return mulShift32(m, FLOAT_POW5_INV_SPLIT[q], j); | ||
#elif defined(RYU_OPTIMIZE_SIZE) | ||
// The inverse multipliers are defined as [2^x / 5^y] + 1; the upper 64 bits from the double lookup | ||
// table are the correct bits for [2^x / 5^y], so we have to add 1 here. Note that we rely on the | ||
// fact that the added 1 that's already stored in the table never overflows into the upper 64 bits. | ||
uint64_t pow5[2]; | ||
double_computeInvPow5(q, pow5); | ||
return mulShift32(m, pow5[1] + 1, j); | ||
#else | ||
return mulShift32(m, DOUBLE_POW5_INV_SPLIT[q][1] + 1, j); | ||
#endif | ||
} | ||
|
||
static inline uint32_t mulPow5divPow2(const uint32_t m, const uint32_t i, const int32_t j) { | ||
#if defined(RYU_FLOAT_FULL_TABLE) | ||
return mulShift32(m, FLOAT_POW5_SPLIT[i], j); | ||
#elif defined(RYU_OPTIMIZE_SIZE) | ||
uint64_t pow5[2]; | ||
double_computePow5(i, pow5); | ||
return mulShift32(m, pow5[1], j); | ||
#else | ||
return mulShift32(m, DOUBLE_POW5_SPLIT[i][1], j); | ||
#endif | ||
} | ||
|
||
#endif // RYU_F2S_INTRINSICS_H |