Skip to content

Commit

Permalink
Merge pull request #8832 from rouault/update_fast_float
Browse files Browse the repository at this point in the history
Resync third_party/fast_float with upstream
  • Loading branch information
rouault authored Nov 27, 2023
2 parents 978cc1b + fb478a0 commit a3cd6a3
Show file tree
Hide file tree
Showing 7 changed files with 433 additions and 162 deletions.
2 changes: 1 addition & 1 deletion third_party/fast_float/PROVENANCE.TXT
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
https://github.com/fastfloat/fast_float
Retrieved at commit https://github.com/fastfloat/fast_float/commit/fe571b1da7f9a61bf644d6e5ff3044f7ad2e27d8
Retrieved at commit https://github.com/fastfloat/fast_float/commit/a5ea2059295260922aa300d676a43a76b5e19a35

Using the MIT license choice.
262 changes: 214 additions & 48 deletions third_party/fast_float/ascii_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,29 @@
#include <cstdint>
#include <cstring>
#include <iterator>
#include <type_traits>

#include "float_common.h"

#ifdef FASTFLOAT_SSE2
#include <emmintrin.h>
#endif

#ifdef FASTFLOAT_NEON
#include <arm_neon.h>
#endif

namespace fast_float {

template <typename UC>
fastfloat_really_inline constexpr bool has_simd_opt() {
#ifdef FASTFLOAT_HAS_SIMD
return std::is_same<UC, char16_t>::value;
#else
return false;
#endif
}

// Next function can be micro-optimized, but compilers are entirely
// able to optimize it well.
template <typename UC>
Expand All @@ -28,12 +46,14 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) {
| (val & 0x00000000000000FF) << 56;
}

// Read 8 UC into a u64. Truncates UC if not char.
template <typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
uint64_t read_u64(const char *chars) {
if (cpp20_and_in_constexpr()) {
uint64_t read8_to_u64(const UC *chars) {
if (cpp20_and_in_constexpr() || !std::is_same<UC, char>::value) {
uint64_t val = 0;
for(int i = 0; i < 8; ++i) {
val |= uint64_t(*chars) << (i*8);
val |= uint64_t(uint8_t(*chars)) << (i*8);
++chars;
}
return val;
Expand All @@ -47,6 +67,62 @@ uint64_t read_u64(const char *chars) {
return val;
}

#ifdef FASTFLOAT_SSE2

fastfloat_really_inline
uint64_t simd_read8_to_u64(const __m128i data) {
FASTFLOAT_SIMD_DISABLE_WARNINGS
const __m128i packed = _mm_packus_epi16(data, data);
#ifdef FASTFLOAT_64BIT
return uint64_t(_mm_cvtsi128_si64(packed));
#else
uint64_t value;
// Visual Studio + older versions of GCC don't support _mm_storeu_si64
_mm_storel_epi64(reinterpret_cast<__m128i*>(&value), packed);
return value;
#endif
FASTFLOAT_SIMD_RESTORE_WARNINGS
}

fastfloat_really_inline
uint64_t simd_read8_to_u64(const char16_t* chars) {
FASTFLOAT_SIMD_DISABLE_WARNINGS
return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast<const __m128i*>(chars)));
FASTFLOAT_SIMD_RESTORE_WARNINGS
}

#elif defined(FASTFLOAT_NEON)


fastfloat_really_inline
uint64_t simd_read8_to_u64(const uint16x8_t data) {
FASTFLOAT_SIMD_DISABLE_WARNINGS
uint8x8_t utf8_packed = vmovn_u16(data);
return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0);
FASTFLOAT_SIMD_RESTORE_WARNINGS
}

fastfloat_really_inline
uint64_t simd_read8_to_u64(const char16_t* chars) {
FASTFLOAT_SIMD_DISABLE_WARNINGS
return simd_read8_to_u64(vld1q_u16(reinterpret_cast<const uint16_t*>(chars)));
FASTFLOAT_SIMD_RESTORE_WARNINGS
}

#endif // FASTFLOAT_SSE2

// MSVC SFINAE is broken pre-VS2017
#if defined(_MSC_VER) && _MSC_VER <= 1900
template <typename UC>
#else
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
#endif
// dummy for compile
uint64_t simd_read8_to_u64(UC const*) {
return 0;
}


fastfloat_really_inline FASTFLOAT_CONSTEXPR20
void write_u64(uint8_t *chars, uint64_t val) {
if (cpp20_and_in_constexpr()) {
Expand Down Expand Up @@ -76,40 +152,103 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) {
return uint32_t(val);
}

fastfloat_really_inline constexpr
uint32_t parse_eight_digits_unrolled(const char16_t *) noexcept {
return 0;
}

fastfloat_really_inline constexpr
uint32_t parse_eight_digits_unrolled(const char32_t *) noexcept {
return 0;
}

// Call this if chars are definitely 8 digits.
template <typename UC>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
uint32_t parse_eight_digits_unrolled(const char *chars) noexcept {
return parse_eight_digits_unrolled(read_u64(chars));
uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept {
if (cpp20_and_in_constexpr() || !has_simd_opt<UC>()) {
return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay
}
return parse_eight_digits_unrolled(simd_read8_to_u64(chars));
}


// credit @aqrit
fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept {
return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) &
0x8080808080808080));
}

fastfloat_really_inline constexpr
bool is_made_of_eight_digits_fast(const char16_t *) noexcept {

#ifdef FASTFLOAT_HAS_SIMD

// Call this if chars might not be 8 digits.
// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled())
// ensures we don't load SIMD registers twice.
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept {
if (cpp20_and_in_constexpr()) {
return false;
}
#ifdef FASTFLOAT_SSE2
FASTFLOAT_SIMD_DISABLE_WARNINGS
const __m128i data = _mm_loadu_si128(reinterpret_cast<const __m128i*>(chars));

// (x - '0') <= 9
// http://0x80.pl/articles/simd-parsing-int-sequences.html
const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720));
const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759));

if (_mm_movemask_epi8(t1) == 0) {
i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
return true;
}
else return false;
FASTFLOAT_SIMD_RESTORE_WARNINGS
#elif defined(FASTFLOAT_NEON)
FASTFLOAT_SIMD_DISABLE_WARNINGS
const uint16x8_t data = vld1q_u16(reinterpret_cast<const uint16_t*>(chars));

// (x - '0') <= 9
// http://0x80.pl/articles/simd-parsing-int-sequences.html
const uint16x8_t t0 = vsubq_u16(data, vmovq_n_u16('0'));
const uint16x8_t mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1));

if (vminvq_u16(mask) == 0xFFFF) {
i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data));
return true;
}
else return false;
FASTFLOAT_SIMD_RESTORE_WARNINGS
#else
(void)chars; (void)i;
return false;
#endif // FASTFLOAT_SSE2
}

fastfloat_really_inline constexpr
bool is_made_of_eight_digits_fast(const char32_t *) noexcept {
return false;
#endif // FASTFLOAT_HAS_SIMD

// MSVC SFINAE is broken pre-VS2017
#if defined(_MSC_VER) && _MSC_VER <= 1900
template <typename UC>
#else
template <typename UC, FASTFLOAT_ENABLE_IF(!has_simd_opt<UC>())>
#endif
// dummy for compile
bool simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) {
return 0;
}


template <typename UC, FASTFLOAT_ENABLE_IF(!std::is_same<UC, char>::value)>
fastfloat_really_inline FASTFLOAT_CONSTEXPR20
bool is_made_of_eight_digits_fast(const char *chars) noexcept {
return is_made_of_eight_digits_fast(read_u64(chars));
void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) {
if (!has_simd_opt<UC>()) {
return;
}
while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok
p += 8;
}
}

fastfloat_really_inline FASTFLOAT_CONSTEXPR20
void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) {
// optimizes better than parse_if_eight_digits_unrolled() for UC = char.
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) {
i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok
p += 8;
}
}

template <typename UC>
Expand All @@ -124,8 +263,10 @@ struct parsed_number_string_t {
span<const UC> integer{}; // non-nullable
span<const UC> fraction{}; // nullable
};
using byte_span = span<char>;

using byte_span = span<const char>;
using parsed_number_string = parsed_number_string_t<char>;

// Assuming that you use no more than 19 digits, this will
// parse an ASCII string.
template <typename UC>
Expand All @@ -139,16 +280,22 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
answer.too_many_digits = false;
answer.negative = (*p == UC('-'));
#ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default
if ((*p == UC('-')) || (*p == UC('+'))) {
if ((*p == UC('-')) || (!(fmt & FASTFLOAT_JSONFMT) && *p == UC('+'))) {
#else
if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here
#endif
++p;
if (p == pend) {
return answer;
}
if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
return answer;
if (fmt & FASTFLOAT_JSONFMT) {
if (!is_integer(*p)) { // a sign must be followed by an integer
return answer;
}
} else {
if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
return answer;
}
}
}
UC const * const start_digits = p;
Expand All @@ -165,18 +312,22 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
UC const * const end_of_integer_part = p;
int64_t digit_count = int64_t(end_of_integer_part - start_digits);
answer.integer = span<const UC>(start_digits, size_t(digit_count));
if (fmt & FASTFLOAT_JSONFMT) {
// at least 1 digit in integer part, without leading zeros
if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) {
return answer;
}
}

int64_t exponent = 0;
if ((p != pend) && (*p == decimal_point)) {
const bool has_decimal_point = (p != pend) && (*p == decimal_point);
if (has_decimal_point) {
++p;
UC const * before = p;
// can occur at most twice without overflowing, but let it occur more, since
// for integers with many digits, digit parsing is the primary bottleneck.
if (std::is_same<UC,char>::value) {
while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) {
i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok
p += 8;
}
}
loop_parse_if_eight_digits(p, pend, i);

while ((p != pend) && is_integer(*p)) {
uint8_t digit = uint8_t(*p - UC('0'));
++p;
Expand All @@ -186,14 +337,27 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
answer.fraction = span<const UC>(before, size_t(p - before));
digit_count -= exponent;
}
// we must have encountered at least one integer!
if (digit_count == 0) {
if (fmt & FASTFLOAT_JSONFMT) {
// at least 1 digit in fractional part
if (has_decimal_point && exponent == 0) {
return answer;
}
}
else if (digit_count == 0) { // we must have encountered at least one integer!
return answer;
}
int64_t exp_number = 0; // explicit exponential part
if ((fmt & chars_format::scientific) && (p != pend) && ((UC('e') == *p) || (UC('E') == *p))) {
if ( ((fmt & chars_format::scientific) &&
(p != pend) &&
((UC('e') == *p) || (UC('E') == *p)))
||
((fmt & FASTFLOAT_FORTRANFMT) &&
(p != pend) &&
((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) || (UC('D') == *p)))) {
UC const * location_of_e = p;
++p;
if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) || (UC('D') == *p)) {
++p;
}
bool neg_exp = false;
if ((p != pend) && (UC('-') == *p)) {
neg_exp = true;
Expand Down Expand Up @@ -241,29 +405,31 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
if(*start == UC('0')) { digit_count --; }
start++;
}

if (digit_count > 19) {
answer.too_many_digits = true;
// Let us start again, this time, avoiding overflows.
// We don't need to check if is_integer, since we use the
// pre-tokenized spans from above.
i = 0;
p = answer.integer.ptr;
UC const * int_end = p + answer.integer.len();
const uint64_t minimal_nineteen_digit_integer{1000000000000000000};
while((i < minimal_nineteen_digit_integer) && (p != int_end)) {
UC const* int_end = p + answer.integer.len();
const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 };
while ((i < minimal_nineteen_digit_integer) && (p != int_end)) {
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
if (i >= minimal_nineteen_digit_integer) { // We have a big integers
exponent = end_of_integer_part - p + exp_number;
} else { // We have a value with a fractional component.
p = answer.fraction.ptr;
UC const * frac_end = p + answer.fraction.len();
while((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
exponent = answer.fraction.ptr - p + exp_number;
}
else { // We have a value with a fractional component.
p = answer.fraction.ptr;
UC const* frac_end = p + answer.fraction.len();
while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) {
i = i * 10 + uint64_t(*p - UC('0'));
++p;
}
exponent = answer.fraction.ptr - p + exp_number;
}
// We have now corrected both exponent and i, to a truncated value
}
Expand Down
Loading

0 comments on commit a3cd6a3

Please sign in to comment.