From 54bad34baa84c7d45c576702bc2a3c60ef260c36 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sun, 26 Nov 2023 23:38:13 +0100 Subject: [PATCH 1/2] Add third_party/fast_float/resync_with_upstream.sh --- .../fast_float/resync_with_upstream.sh | 29 +++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100755 third_party/fast_float/resync_with_upstream.sh diff --git a/third_party/fast_float/resync_with_upstream.sh b/third_party/fast_float/resync_with_upstream.sh new file mode 100755 index 000000000000..1c9d72141ce4 --- /dev/null +++ b/third_party/fast_float/resync_with_upstream.sh @@ -0,0 +1,29 @@ +#!/bin/sh + +set -eu + +SCRIPT_DIR=$(dirname "$0") +case $SCRIPT_DIR in + "/"*) + ;; + ".") + SCRIPT_DIR=$(pwd) + ;; + *) + SCRIPT_DIR=$(pwd)/$(dirname "$0") + ;; +esac +cd "${SCRIPT_DIR}" + +rm -rf tmp_fast_float +git clone --depth 1 https://github.com/fastfloat/fast_float tmp_fast_float +cp tmp_fast_float/include/fast_float/* "${SCRIPT_DIR}/" + +cat > "${SCRIPT_DIR}/PROVENANCE.TXT" << EOF +https://github.com/fastfloat/fast_float +Retrieved at commit https://github.com/fastfloat/fast_float/commit/$(git rev-parse HEAD) + +Using the MIT license choice. +EOF + +rm -rf tmp_fast_float From fb478a03b1ddc29f6267c14772ef6431d1e84a25 Mon Sep 17 00:00:00 2001 From: Even Rouault Date: Sun, 26 Nov 2023 23:38:34 +0100 Subject: [PATCH 2/2] Resync third_party/fast_float with upstream --- third_party/fast_float/PROVENANCE.TXT | 2 +- third_party/fast_float/ascii_number.h | 262 ++++++++++++++++++---- third_party/fast_float/digit_comparison.h | 24 +- third_party/fast_float/fast_float.h | 32 +-- third_party/fast_float/float_common.h | 236 +++++++++++++------ third_party/fast_float/parse_number.h | 10 +- 6 files changed, 404 insertions(+), 162 deletions(-) diff --git a/third_party/fast_float/PROVENANCE.TXT b/third_party/fast_float/PROVENANCE.TXT index a66256f7e2b5..0ea9d9ccd1d2 100644 --- a/third_party/fast_float/PROVENANCE.TXT +++ b/third_party/fast_float/PROVENANCE.TXT @@ -1,4 +1,4 @@ https://github.com/fastfloat/fast_float -Retrieved at commit https://github.com/fastfloat/fast_float/commit/fe571b1da7f9a61bf644d6e5ff3044f7ad2e27d8 +Retrieved at commit https://github.com/fastfloat/fast_float/commit/a5ea2059295260922aa300d676a43a76b5e19a35 Using the MIT license choice. diff --git a/third_party/fast_float/ascii_number.h b/third_party/fast_float/ascii_number.h index d506326ec9a6..d18e3d5360af 100644 --- a/third_party/fast_float/ascii_number.h +++ b/third_party/fast_float/ascii_number.h @@ -5,11 +5,29 @@ #include #include #include +#include #include "float_common.h" +#ifdef FASTFLOAT_SSE2 +#include +#endif + +#ifdef FASTFLOAT_NEON +#include +#endif + namespace fast_float { +template +fastfloat_really_inline constexpr bool has_simd_opt() { +#ifdef FASTFLOAT_HAS_SIMD + return std::is_same::value; +#else + return false; +#endif +} + // Next function can be micro-optimized, but compilers are entirely // able to optimize it well. template @@ -28,12 +46,14 @@ fastfloat_really_inline constexpr uint64_t byteswap(uint64_t val) { | (val & 0x00000000000000FF) << 56; } +// Read 8 UC into a u64. Truncates UC if not char. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint64_t read_u64(const char *chars) { - if (cpp20_and_in_constexpr()) { +uint64_t read8_to_u64(const UC *chars) { + if (cpp20_and_in_constexpr() || !std::is_same::value) { uint64_t val = 0; for(int i = 0; i < 8; ++i) { - val |= uint64_t(*chars) << (i*8); + val |= uint64_t(uint8_t(*chars)) << (i*8); ++chars; } return val; @@ -47,6 +67,62 @@ uint64_t read_u64(const char *chars) { return val; } +#ifdef FASTFLOAT_SSE2 + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const __m128i data) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i packed = _mm_packus_epi16(data, data); +#ifdef FASTFLOAT_64BIT + return uint64_t(_mm_cvtsi128_si64(packed)); +#else + uint64_t value; + // Visual Studio + older versions of GCC don't support _mm_storeu_si64 + _mm_storel_epi64(reinterpret_cast<__m128i*>(&value), packed); + return value; +#endif +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64(_mm_loadu_si128(reinterpret_cast(chars))); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#elif defined(FASTFLOAT_NEON) + + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const uint16x8_t data) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + uint8x8_t utf8_packed = vmovn_u16(data); + return vget_lane_u64(vreinterpret_u64_u8(utf8_packed), 0); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +fastfloat_really_inline +uint64_t simd_read8_to_u64(const char16_t* chars) { +FASTFLOAT_SIMD_DISABLE_WARNINGS + return simd_read8_to_u64(vld1q_u16(reinterpret_cast(chars))); +FASTFLOAT_SIMD_RESTORE_WARNINGS +} + +#endif // FASTFLOAT_SSE2 + +// MSVC SFINAE is broken pre-VS2017 +#if defined(_MSC_VER) && _MSC_VER <= 1900 +template +#else +template ())> +#endif +// dummy for compile +uint64_t simd_read8_to_u64(UC const*) { + return 0; +} + + fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void write_u64(uint8_t *chars, uint64_t val) { if (cpp20_and_in_constexpr()) { @@ -76,40 +152,103 @@ uint32_t parse_eight_digits_unrolled(uint64_t val) { return uint32_t(val); } -fastfloat_really_inline constexpr -uint32_t parse_eight_digits_unrolled(const char16_t *) noexcept { - return 0; -} - -fastfloat_really_inline constexpr -uint32_t parse_eight_digits_unrolled(const char32_t *) noexcept { - return 0; -} +// Call this if chars are definitely 8 digits. +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -uint32_t parse_eight_digits_unrolled(const char *chars) noexcept { - return parse_eight_digits_unrolled(read_u64(chars)); +uint32_t parse_eight_digits_unrolled(UC const * chars) noexcept { + if (cpp20_and_in_constexpr() || !has_simd_opt()) { + return parse_eight_digits_unrolled(read8_to_u64(chars)); // truncation okay + } + return parse_eight_digits_unrolled(simd_read8_to_u64(chars)); } + // credit @aqrit -fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { +fastfloat_really_inline constexpr bool is_made_of_eight_digits_fast(uint64_t val) noexcept { return !((((val + 0x4646464646464646) | (val - 0x3030303030303030)) & 0x8080808080808080)); } -fastfloat_really_inline constexpr -bool is_made_of_eight_digits_fast(const char16_t *) noexcept { + +#ifdef FASTFLOAT_HAS_SIMD + +// Call this if chars might not be 8 digits. +// Using this style (instead of is_made_of_eight_digits_fast() then parse_eight_digits_unrolled()) +// ensures we don't load SIMD registers twice. +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +bool simd_parse_if_eight_digits_unrolled(const char16_t* chars, uint64_t& i) noexcept { + if (cpp20_and_in_constexpr()) { + return false; + } +#ifdef FASTFLOAT_SSE2 +FASTFLOAT_SIMD_DISABLE_WARNINGS + const __m128i data = _mm_loadu_si128(reinterpret_cast(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + const __m128i t0 = _mm_add_epi16(data, _mm_set1_epi16(32720)); + const __m128i t1 = _mm_cmpgt_epi16(t0, _mm_set1_epi16(-32759)); + + if (_mm_movemask_epi8(t1) == 0) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } + else return false; +FASTFLOAT_SIMD_RESTORE_WARNINGS +#elif defined(FASTFLOAT_NEON) +FASTFLOAT_SIMD_DISABLE_WARNINGS + const uint16x8_t data = vld1q_u16(reinterpret_cast(chars)); + + // (x - '0') <= 9 + // http://0x80.pl/articles/simd-parsing-int-sequences.html + const uint16x8_t t0 = vsubq_u16(data, vmovq_n_u16('0')); + const uint16x8_t mask = vcltq_u16(t0, vmovq_n_u16('9' - '0' + 1)); + + if (vminvq_u16(mask) == 0xFFFF) { + i = i * 100000000 + parse_eight_digits_unrolled(simd_read8_to_u64(data)); + return true; + } + else return false; +FASTFLOAT_SIMD_RESTORE_WARNINGS +#else + (void)chars; (void)i; return false; +#endif // FASTFLOAT_SSE2 } -fastfloat_really_inline constexpr -bool is_made_of_eight_digits_fast(const char32_t *) noexcept { - return false; +#endif // FASTFLOAT_HAS_SIMD + +// MSVC SFINAE is broken pre-VS2017 +#if defined(_MSC_VER) && _MSC_VER <= 1900 +template +#else +template ())> +#endif +// dummy for compile +bool simd_parse_if_eight_digits_unrolled(UC const*, uint64_t&) { + return 0; } + +template ::value)> fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -bool is_made_of_eight_digits_fast(const char *chars) noexcept { - return is_made_of_eight_digits_fast(read_u64(chars)); +void loop_parse_if_eight_digits(const UC*& p, const UC* const pend, uint64_t& i) { + if (!has_simd_opt()) { + return; + } + while ((std::distance(p, pend) >= 8) && simd_parse_if_eight_digits_unrolled(p, i)) { // in rare cases, this will overflow, but that's ok + p += 8; + } +} + +fastfloat_really_inline FASTFLOAT_CONSTEXPR20 +void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t& i) { + // optimizes better than parse_if_eight_digits_unrolled() for UC = char. + while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(read8_to_u64(p))) { + i = i * 100000000 + parse_eight_digits_unrolled(read8_to_u64(p)); // in rare cases, this will overflow, but that's ok + p += 8; + } } template @@ -124,8 +263,10 @@ struct parsed_number_string_t { span integer{}; // non-nullable span fraction{}; // nullable }; -using byte_span = span; + +using byte_span = span; using parsed_number_string = parsed_number_string_t; + // Assuming that you use no more than 19 digits, this will // parse an ASCII string. template @@ -139,7 +280,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par answer.too_many_digits = false; answer.negative = (*p == UC('-')); #ifdef FASTFLOAT_ALLOWS_LEADING_PLUS // disabled by default - if ((*p == UC('-')) || (*p == UC('+'))) { + if ((*p == UC('-')) || (!(fmt & FASTFLOAT_JSONFMT) && *p == UC('+'))) { #else if (*p == UC('-')) { // C++17 20.19.3.(7.1) explicitly forbids '+' sign here #endif @@ -147,8 +288,14 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par if (p == pend) { return answer; } - if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot - return answer; + if (fmt & FASTFLOAT_JSONFMT) { + if (!is_integer(*p)) { // a sign must be followed by an integer + return answer; + } + } else { + if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot + return answer; + } } } UC const * const start_digits = p; @@ -165,18 +312,22 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par UC const * const end_of_integer_part = p; int64_t digit_count = int64_t(end_of_integer_part - start_digits); answer.integer = span(start_digits, size_t(digit_count)); + if (fmt & FASTFLOAT_JSONFMT) { + // at least 1 digit in integer part, without leading zeros + if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) { + return answer; + } + } + int64_t exponent = 0; - if ((p != pend) && (*p == decimal_point)) { + const bool has_decimal_point = (p != pend) && (*p == decimal_point); + if (has_decimal_point) { ++p; UC const * before = p; // can occur at most twice without overflowing, but let it occur more, since // for integers with many digits, digit parsing is the primary bottleneck. - if (std::is_same::value) { - while ((std::distance(p, pend) >= 8) && is_made_of_eight_digits_fast(p)) { - i = i * 100000000 + parse_eight_digits_unrolled(p); // in rare cases, this will overflow, but that's ok - p += 8; - } - } + loop_parse_if_eight_digits(p, pend, i); + while ((p != pend) && is_integer(*p)) { uint8_t digit = uint8_t(*p - UC('0')); ++p; @@ -186,14 +337,27 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par answer.fraction = span(before, size_t(p - before)); digit_count -= exponent; } - // we must have encountered at least one integer! - if (digit_count == 0) { + if (fmt & FASTFLOAT_JSONFMT) { + // at least 1 digit in fractional part + if (has_decimal_point && exponent == 0) { + return answer; + } + } + else if (digit_count == 0) { // we must have encountered at least one integer! return answer; } int64_t exp_number = 0; // explicit exponential part - if ((fmt & chars_format::scientific) && (p != pend) && ((UC('e') == *p) || (UC('E') == *p))) { + if ( ((fmt & chars_format::scientific) && + (p != pend) && + ((UC('e') == *p) || (UC('E') == *p))) + || + ((fmt & FASTFLOAT_FORTRANFMT) && + (p != pend) && + ((UC('+') == *p) || (UC('-') == *p) || (UC('d') == *p) || (UC('D') == *p)))) { UC const * location_of_e = p; - ++p; + if ((UC('e') == *p) || (UC('E') == *p) || (UC('d') == *p) || (UC('D') == *p)) { + ++p; + } bool neg_exp = false; if ((p != pend) && (UC('-') == *p)) { neg_exp = true; @@ -241,6 +405,7 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par if(*start == UC('0')) { digit_count --; } start++; } + if (digit_count > 19) { answer.too_many_digits = true; // Let us start again, this time, avoiding overflows. @@ -248,22 +413,23 @@ parsed_number_string_t parse_number_string(UC const *p, UC const * pend, par // pre-tokenized spans from above. i = 0; p = answer.integer.ptr; - UC const * int_end = p + answer.integer.len(); - const uint64_t minimal_nineteen_digit_integer{1000000000000000000}; - while((i < minimal_nineteen_digit_integer) && (p != int_end)) { + UC const* int_end = p + answer.integer.len(); + const uint64_t minimal_nineteen_digit_integer{ 1000000000000000000 }; + while ((i < minimal_nineteen_digit_integer) && (p != int_end)) { i = i * 10 + uint64_t(*p - UC('0')); ++p; } if (i >= minimal_nineteen_digit_integer) { // We have a big integers exponent = end_of_integer_part - p + exp_number; - } else { // We have a value with a fractional component. - p = answer.fraction.ptr; - UC const * frac_end = p + answer.fraction.len(); - while((i < minimal_nineteen_digit_integer) && (p != frac_end)) { - i = i * 10 + uint64_t(*p - UC('0')); - ++p; - } - exponent = answer.fraction.ptr - p + exp_number; + } + else { // We have a value with a fractional component. + p = answer.fraction.ptr; + UC const* frac_end = p + answer.fraction.len(); + while ((i < minimal_nineteen_digit_integer) && (p != frac_end)) { + i = i * 10 + uint64_t(*p - UC('0')); + ++p; + } + exponent = answer.fraction.ptr - p + exp_number; } // We have now corrected both exponent and i, to a truncated value } diff --git a/third_party/fast_float/digit_comparison.h b/third_party/fast_float/digit_comparison.h index f469f6b55353..512a27f5a5f4 100644 --- a/third_party/fast_float/digit_comparison.h +++ b/third_party/fast_float/digit_comparison.h @@ -201,18 +201,10 @@ bool is_truncated(span s) noexcept { return is_truncated(s.ptr, s.ptr + s.len()); } -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(const char16_t*& , limb& , size_t& , size_t& ) noexcept { - // currently unused -} - -fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(const char32_t*& , limb& , size_t& , size_t& ) noexcept { - // currently unused -} +template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 -void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept { +void parse_eight_digits(const UC*& p, limb& value, size_t& counter, size_t& count) noexcept { value = value * 100000000 + parse_eight_digits_unrolled(p); p += 8; counter += 8; @@ -264,10 +256,8 @@ void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_ skip_zeros(p, pend); // process all digits, in increments of step per loop while (p != pend) { - if (std::is_same::value) { - while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { - parse_eight_digits(p, value, counter, digits); - } + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); } while (counter < step && p != pend && digits < max_digits) { parse_one_digit(p, value, counter, digits); @@ -299,10 +289,8 @@ void parse_mantissa(bigint& result, parsed_number_string_t& num, size_t max_ } // process all digits, in increments of step per loop while (p != pend) { - if (std::is_same::value) { - while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { - parse_eight_digits(p, value, counter, digits); - } + while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) { + parse_eight_digits(p, value, counter, digits); } while (counter < step && p != pend && digits < max_digits) { parse_one_digit(p, value, counter, digits); diff --git a/third_party/fast_float/fast_float.h b/third_party/fast_float/fast_float.h index 1cc25f490b5e..04efa877ee7b 100644 --- a/third_party/fast_float/fast_float.h +++ b/third_party/fast_float/fast_float.h @@ -1,38 +1,10 @@ + #ifndef FASTFLOAT_FAST_FLOAT_H #define FASTFLOAT_FAST_FLOAT_H -#include - -#include "constexpr_feature_detect.h" +#include "float_common.h" namespace fast_float { -enum chars_format { - scientific = 1<<0, - fixed = 1<<2, - hex = 1<<3, - general = fixed | scientific -}; - -template -struct from_chars_result_t { - UC const * ptr; - std::errc ec; -}; -using from_chars_result = from_chars_result_t; - -template -struct parse_options_t { - constexpr explicit parse_options_t(chars_format fmt = chars_format::general, - UC dot = UC('.')) - : format(fmt), decimal_point(dot) {} - - /** Which number formats are accepted */ - chars_format format; - /** The character used as decimal point */ - UC decimal_point; -}; -using parse_options = parse_options_t; - /** * This function parses the character sequence [first,last) for a number. It parses floating-point numbers expecting * a locale-indepent format equivalent to what is used by std::strtod in the default ("C") locale. diff --git a/third_party/fast_float/float_common.h b/third_party/fast_float/float_common.h index 6901d7c6fa2e..bee882152025 100644 --- a/third_party/fast_float/float_common.h +++ b/third_party/fast_float/float_common.h @@ -6,6 +6,49 @@ #include #include #include +#include + +#include "constexpr_feature_detect.h" + +namespace fast_float { + +#define FASTFLOAT_JSONFMT (1 << 5) +#define FASTFLOAT_FORTRANFMT (1 << 6) + +enum chars_format { + scientific = 1 << 0, + fixed = 1 << 2, + hex = 1 << 3, + no_infnan = 1 << 4, + // RFC 8259: https://datatracker.ietf.org/doc/html/rfc8259#section-6 + json = FASTFLOAT_JSONFMT | fixed | scientific | no_infnan, + // Extension of RFC 8259 where, e.g., "inf" and "nan" are allowed. + json_or_infnan = FASTFLOAT_JSONFMT | fixed | scientific, + fortran = FASTFLOAT_FORTRANFMT | fixed | scientific, + general = fixed | scientific +}; + +template +struct from_chars_result_t { + UC const* ptr; + std::errc ec; +}; +using from_chars_result = from_chars_result_t; + +template +struct parse_options_t { + constexpr explicit parse_options_t(chars_format fmt = chars_format::general, + UC dot = UC('.')) + : format(fmt), decimal_point(dot) {} + + /** Which number formats are accepted */ + chars_format format; + /** The character used as decimal point */ + UC decimal_point; +}; +using parse_options = parse_options_t; + +} #if FASTFLOAT_HAS_BIT_CAST #include @@ -15,7 +58,8 @@ || defined(__amd64) || defined(__aarch64__) || defined(_M_ARM64) \ || defined(__MINGW64__) \ || defined(__s390x__) \ - || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) ) + || (defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || defined(__PPC64LE__)) \ + || defined(__loongarch64) ) #define FASTFLOAT_64BIT 1 #elif (defined(__i386) || defined(__i386__) || defined(_M_IX86) \ || defined(__arm__) || defined(_M_ARM) || defined(__ppc__) \ @@ -53,6 +97,8 @@ #include #elif defined(sun) || defined(__sun) #include +#elif defined(__MVS__) +#include #else #ifdef __has_include #if __has_include() @@ -78,6 +124,38 @@ #endif #endif +#if defined(__SSE2__) || \ + (defined(FASTFLOAT_VISUAL_STUDIO) && \ + (defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && _M_IX86_FP == 2))) +#define FASTFLOAT_SSE2 1 +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +#define FASTFLOAT_NEON 1 +#endif + +#if defined(FASTFLOAT_SSE2) || defined(FASTFLOAT_ARM64) +#define FASTFLOAT_HAS_SIMD 1 +#endif + +#if defined(__GNUC__) +// disable -Wcast-align=strict (GCC only) +#define FASTFLOAT_SIMD_DISABLE_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wcast-align\"") +#else +#define FASTFLOAT_SIMD_DISABLE_WARNINGS +#endif + +#if defined(__GNUC__) +#define FASTFLOAT_SIMD_RESTORE_WARNINGS \ + _Pragma("GCC diagnostic pop") +#else +#define FASTFLOAT_SIMD_RESTORE_WARNINGS +#endif + + + #ifdef FASTFLOAT_VISUAL_STUDIO #define fastfloat_really_inline __forceinline #else @@ -95,6 +173,9 @@ // rust style `try!()` macro, or `?` operator #define FASTFLOAT_TRY(x) { if (!(x)) return false; } +#define FASTFLOAT_ENABLE_IF(...) typename std::enable_if<(__VA_ARGS__), int>::type = 0 + + namespace fast_float { fastfloat_really_inline constexpr bool cpp20_and_in_constexpr() { @@ -145,18 +226,16 @@ struct value128 { constexpr value128() : low(0), high(0) {} }; -/* Helper C++11 constexpr generic implementation of leading_zeroes */ -fastfloat_really_inline constexpr +/* Helper C++14 constexpr generic implementation of leading_zeroes */ +fastfloat_really_inline FASTFLOAT_CONSTEXPR14 int leading_zeroes_generic(uint64_t input_num, int last_bit = 0) { - return ( - ((input_num & uint64_t(0xffffffff00000000)) && (input_num >>= 32, last_bit |= 32)), - ((input_num & uint64_t( 0xffff0000)) && (input_num >>= 16, last_bit |= 16)), - ((input_num & uint64_t( 0xff00)) && (input_num >>= 8, last_bit |= 8)), - ((input_num & uint64_t( 0xf0)) && (input_num >>= 4, last_bit |= 4)), - ((input_num & uint64_t( 0xc)) && (input_num >>= 2, last_bit |= 2)), - ((input_num & uint64_t( 0x2)) && (input_num >>= 1, last_bit |= 1)), - 63 - last_bit - ); + if(input_num & uint64_t(0xffffffff00000000)) { input_num >>= 32; last_bit |= 32; } + if(input_num & uint64_t( 0xffff0000)) { input_num >>= 16; last_bit |= 16; } + if(input_num & uint64_t( 0xff00)) { input_num >>= 8; last_bit |= 8; } + if(input_num & uint64_t( 0xf0)) { input_num >>= 4; last_bit |= 4; } + if(input_num & uint64_t( 0xc)) { input_num >>= 2; last_bit |= 2; } + if(input_num & uint64_t( 0x2)) { input_num >>= 1; last_bit |= 1; } + return 63 - last_bit; } /* result might be undefined when input_num is zero */ @@ -252,16 +331,43 @@ struct adjusted_mantissa { // Bias so we can get the real exponent with an invalid adjusted_mantissa. constexpr static int32_t invalid_am_bias = -0x8000; -constexpr static double powers_of_ten_double[] = { - 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, - 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; -constexpr static float powers_of_ten_float[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, - 1e6f, 1e7f, 1e8f, 1e9f, 1e10f}; -// used for max_mantissa_double and max_mantissa_float +// used for binary_format_lookup_tables::max_mantissa constexpr uint64_t constant_55555 = 5 * 5 * 5 * 5 * 5; -// Largest integer value v so that (5**index * v) <= 1<<53. -// 0x10000000000000 == 1 << 53 -constexpr static uint64_t max_mantissa_double[] = { + +template +struct binary_format_lookup_tables; + +template struct binary_format : binary_format_lookup_tables { + using equiv_uint = typename std::conditional::type; + + static inline constexpr int mantissa_explicit_bits(); + static inline constexpr int minimum_exponent(); + static inline constexpr int infinite_power(); + static inline constexpr int sign_index(); + static inline constexpr int min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST + static inline constexpr int max_exponent_fast_path(); + static inline constexpr int max_exponent_round_to_even(); + static inline constexpr int min_exponent_round_to_even(); + static inline constexpr uint64_t max_mantissa_fast_path(int64_t power); + static inline constexpr uint64_t max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST + static inline constexpr int largest_power_of_ten(); + static inline constexpr int smallest_power_of_ten(); + static inline constexpr T exact_power_of_ten(int64_t power); + static inline constexpr size_t max_digits(); + static inline constexpr equiv_uint exponent_mask(); + static inline constexpr equiv_uint mantissa_mask(); + static inline constexpr equiv_uint hidden_bit_mask(); +}; + +template +struct binary_format_lookup_tables { + static constexpr double powers_of_ten[] = { + 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, + 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22}; + + // Largest integer value v so that (5**index * v) <= 1<<53. + // 0x10000000000000 == 1 << 53 + static constexpr uint64_t max_mantissa[] = { 0x10000000000000, 0x10000000000000 / 5, 0x10000000000000 / (5 * 5), @@ -286,44 +392,42 @@ constexpr static uint64_t max_mantissa_double[] = { 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5), 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5), 0x10000000000000 / (constant_55555 * constant_55555 * constant_55555 * constant_55555 * 5 * 5 * 5 * 5)}; +}; + +template +constexpr double binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; + +template +struct binary_format_lookup_tables { + static constexpr float powers_of_ten[] = {1e0f, 1e1f, 1e2f, 1e3f, 1e4f, 1e5f, + 1e6f, 1e7f, 1e8f, 1e9f, 1e10f}; + // Largest integer value v so that (5**index * v) <= 1<<24. // 0x1000000 == 1<<24 - constexpr static uint64_t max_mantissa_float[] = { - 0x1000000, - 0x1000000 / 5, - 0x1000000 / (5 * 5), - 0x1000000 / (5 * 5 * 5), - 0x1000000 / (5 * 5 * 5 * 5), - 0x1000000 / (constant_55555), - 0x1000000 / (constant_55555 * 5), - 0x1000000 / (constant_55555 * 5 * 5), - 0x1000000 / (constant_55555 * 5 * 5 * 5), - 0x1000000 / (constant_55555 * 5 * 5 * 5 * 5), - 0x1000000 / (constant_55555 * constant_55555), - 0x1000000 / (constant_55555 * constant_55555 * 5)}; - -template struct binary_format { - using equiv_uint = typename std::conditional::type; - - static inline constexpr int mantissa_explicit_bits(); - static inline constexpr int minimum_exponent(); - static inline constexpr int infinite_power(); - static inline constexpr int sign_index(); - static inline constexpr int min_exponent_fast_path(); // used when fegetround() == FE_TONEAREST - static inline constexpr int max_exponent_fast_path(); - static inline constexpr int max_exponent_round_to_even(); - static inline constexpr int min_exponent_round_to_even(); - static inline constexpr uint64_t max_mantissa_fast_path(int64_t power); - static inline constexpr uint64_t max_mantissa_fast_path(); // used when fegetround() == FE_TONEAREST - static inline constexpr int largest_power_of_ten(); - static inline constexpr int smallest_power_of_ten(); - static inline constexpr T exact_power_of_ten(int64_t power); - static inline constexpr size_t max_digits(); - static inline constexpr equiv_uint exponent_mask(); - static inline constexpr equiv_uint mantissa_mask(); - static inline constexpr equiv_uint hidden_bit_mask(); + static constexpr uint64_t max_mantissa[] = { + 0x1000000, + 0x1000000 / 5, + 0x1000000 / (5 * 5), + 0x1000000 / (5 * 5 * 5), + 0x1000000 / (5 * 5 * 5 * 5), + 0x1000000 / (constant_55555), + 0x1000000 / (constant_55555 * 5), + 0x1000000 / (constant_55555 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * 5 * 5 * 5 * 5), + 0x1000000 / (constant_55555 * constant_55555), + 0x1000000 / (constant_55555 * constant_55555 * 5)}; }; +template +constexpr float binary_format_lookup_tables::powers_of_ten[]; + +template +constexpr uint64_t binary_format_lookup_tables::max_mantissa[]; + template <> inline constexpr int binary_format::min_exponent_fast_path() { #if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0) return 0; @@ -386,6 +490,7 @@ template <> inline constexpr int binary_format::max_exponent_fast_path() template <> inline constexpr int binary_format::max_exponent_fast_path() { return 10; } + template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { return uint64_t(2) << mantissa_explicit_bits(); } @@ -393,7 +498,8 @@ template <> inline constexpr uint64_t binary_format::max_mantissa_fast_p // caller is responsible to ensure that // power >= 0 && power <= 22 // - return max_mantissa_double[power]; + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; } template <> inline constexpr uint64_t binary_format::max_mantissa_fast_path() { return uint64_t(2) << mantissa_explicit_bits(); @@ -402,17 +508,19 @@ template <> inline constexpr uint64_t binary_format::max_mantissa_fast_pa // caller is responsible to ensure that // power >= 0 && power <= 10 // - return max_mantissa_float[power]; + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)max_mantissa[0], max_mantissa[power]; } template <> inline constexpr double binary_format::exact_power_of_ten(int64_t power) { - return powers_of_ten_double[power]; + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; } template <> inline constexpr float binary_format::exact_power_of_ten(int64_t power) { - - return powers_of_ten_float[power]; + // Work around clang bug https://godbolt.org/z/zedh7rrhc + return (void)powers_of_ten[0], powers_of_ten[power]; } @@ -471,10 +579,10 @@ template <> inline constexpr binary_format::equiv_uint template fastfloat_really_inline FASTFLOAT_CONSTEXPR20 void to_float(bool negative, adjusted_mantissa am, T &value) { - using uint = typename binary_format::equiv_uint; - uint word = (uint)am.mantissa; - word |= uint(am.power2) << binary_format::mantissa_explicit_bits(); - word |= uint(negative) << binary_format::sign_index(); + using fastfloat_uint = typename binary_format::equiv_uint; + fastfloat_uint word = (fastfloat_uint)am.mantissa; + word |= fastfloat_uint(am.power2) << binary_format::mantissa_explicit_bits(); + word |= fastfloat_uint(negative) << binary_format::sign_index(); #if FASTFLOAT_HAS_BIT_CAST value = std::bit_cast(word); #else diff --git a/third_party/fast_float/parse_number.h b/third_party/fast_float/parse_number.h index 726d76167784..a011a8cbf4df 100644 --- a/third_party/fast_float/parse_number.h +++ b/third_party/fast_float/parse_number.h @@ -4,6 +4,7 @@ #include "ascii_number.h" #include "decimal_to_binary.h" #include "digit_comparison.h" +#include "float_common.h" #include #include @@ -163,8 +164,15 @@ from_chars_result_t from_chars_advanced(UC const * first, UC const * last, } parsed_number_string_t pns = parse_number_string(first, last, options); if (!pns.valid) { - return detail::parse_infnan(first, last, value); + if (options.format & chars_format::no_infnan) { + answer.ec = std::errc::invalid_argument; + answer.ptr = first; + return answer; + } else { + return detail::parse_infnan(first, last, value); + } } + answer.ec = std::errc(); // be optimistic answer.ptr = pns.lastmatch; // The implementation of the Clinger's fast path is convoluted because