diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h index 52ad9f42d36..75f19ea0e3d 100644 --- a/dbms/src/Functions/FunctionsTiDBConversion.h +++ b/dbms/src/Functions/FunctionsTiDBConversion.h @@ -65,6 +65,8 @@ namespace DB { String trim(const StringRef & value); +template +void writeFloatTextNoExp(T x, WriteBuffer & buf); enum CastError { @@ -244,7 +246,14 @@ struct TiDBConvertToString for (size_t i = 0; i < size; ++i) { WriteBufferFromVector element_write_buffer(container_per_element); - FormatImpl::execute(vec_from[i], element_write_buffer, &type, nullptr); + if constexpr (std::is_floating_point_v) + { + writeFloatTextNoExp(vec_from[i], element_write_buffer); + } + else + { + FormatImpl::execute(vec_from[i], element_write_buffer, &type, nullptr); + } size_t byte_length = element_write_buffer.count(); if (tp.flen() >= 0) byte_length = std::min(byte_length, tp.flen()); diff --git a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp index a342c836429..fa853f1c309 100644 --- a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp +++ b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp @@ -26,11 +26,48 @@ #include #include -#include +namespace DB +{ +template +void writeFloatTextNoExp(T x, WriteBuffer & buf); +} + namespace DB::tests { namespace { + +template +std::string formatFloat(const T x) +{ + std::string res; + WriteBufferFromString buf(res); + writeFloatTextNoExp(x, buf); + res.resize(buf.count()); + return res; +} + +String genFloatStr(std::string_view val, int zero_n) +{ + assert(zero_n > 0); + + String s; + s.resize(val.size() + zero_n, '0'); + std::memcpy(s.data(), val.data(), val.size()); + return s; +} + +String genFloatStr(int zero_n, std::string_view val) +{ + assert(zero_n > 0); + + String s; + s.resize(val.size() + zero_n + 1, '0'); + s[1] = '.'; + std::memcpy(s.data() + zero_n + 1, val.data(), val.size()); + return s; +} + auto getDatetimeColumn(bool single_field = false) { MyDateTime datetime(2021, 10, 26, 16, 8, 59, 0); @@ -302,7 +339,7 @@ class TestTidbConversion : public DB::tests::FunctionTest ASSERT_TRUE(!input_decimal.empty()); size_t prec = input_decimal.length(); size_t scale = 0; - auto pos = input_decimal.find("."); + auto pos = input_decimal.find('.'); if (pos != std::string::npos) { ASSERT_TRUE(input_decimal.length() >= pos + 1); @@ -1086,34 +1123,52 @@ CATCH TEST_F(TestTidbConversion, castRealAsString) try { + const String str_max_float32 = genFloatStr("34028235", 31); + const String str_min_float32 = genFloatStr(38, "11754944"); + const String str_max_float64 = genFloatStr("17976931348623157", 292); + const String str_min_float64 = genFloatStr(308, "22250738585072014"); + + ASSERT_EQ(formatFloat(MAX_FLOAT32), str_max_float32); + ASSERT_EQ(formatFloat(MIN_FLOAT32), str_min_float32); + ASSERT_EQ(formatFloat(-MAX_FLOAT32), "-" + str_max_float32); + ASSERT_EQ(formatFloat(-MIN_FLOAT32), "-" + str_min_float32); + ASSERT_EQ(formatFloat(std::numeric_limits::infinity()), "+Inf"); + ASSERT_EQ(formatFloat(-std::numeric_limits::infinity()), "-Inf"); + ASSERT_EQ(formatFloat(-std::numeric_limits::quiet_NaN()), "NaN"); + + ASSERT_EQ(formatFloat(MAX_FLOAT64), str_max_float64); + ASSERT_EQ(formatFloat(MIN_FLOAT64), str_min_float64); + ASSERT_EQ(formatFloat(-MAX_FLOAT64), "-" + str_max_float64); + ASSERT_EQ(formatFloat(-MIN_FLOAT64), "-" + str_min_float64); + ASSERT_EQ(formatFloat(std::numeric_limits::infinity()), "+Inf"); + ASSERT_EQ(formatFloat(-std::numeric_limits::infinity()), "-Inf"); + ASSERT_EQ(formatFloat(-std::numeric_limits::quiet_NaN()), "NaN"); + testOnlyNull(); testOnlyNull(); - // TODO add tests after non-expected results fixed - testNotOnlyNull(0, "0"); testNotOnlyNull(12.213, "12.213"); testNotOnlyNull(-12.213, "-12.213"); - // tiflash: 3.4028235e38 - // tidb: 340282350000000000000000000000000000000 - // mysql: 3.40282e38 - // testNotOnlyNull(MAX_FLOAT32, "3.4028235e38"); - // tiflash: 1.1754944e-38 - // tidb: 0.000000000000000000000000000000000000011754944 - // mysql: 1.17549e-38 - // testNotOnlyNull(MIN_FLOAT32, "1.1754944e-38"); - testNotOnlyNull(0, "0"); testNotOnlyNull(12.213, "12.213"); testNotOnlyNull(-12.213, "-12.213"); - // tiflash: 1.7976931348623157e308 - // tidb: 179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000 - // mysql: 1.7976931348623157e308 - // testNotOnlyNull(MAX_FLOAT64, "1.7976931348623157e308"); - // tiflash: 2.2250738585072014e-308 - // tidb: 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072014 - // mysql: 2.2250738585072014e-308 - // testNotOnlyNull(MIN_FLOAT64, "2.2250738585072014e-308"); + + testNotOnlyNull(MAX_FLOAT32, str_max_float32); + testNotOnlyNull(MIN_FLOAT32, str_min_float32); + testNotOnlyNull(-MAX_FLOAT32, "-" + str_max_float32); + testNotOnlyNull(-MIN_FLOAT32, "-" + str_min_float32); + testNotOnlyNull(std::numeric_limits::infinity(), "+Inf"); + testNotOnlyNull(-std::numeric_limits::infinity(), "-Inf"); + testNotOnlyNull(-std::numeric_limits::quiet_NaN(), "NaN"); + + testNotOnlyNull(MAX_FLOAT64, str_max_float64); + testNotOnlyNull(MIN_FLOAT64, str_min_float64); + testNotOnlyNull(-MAX_FLOAT64, "-" + str_max_float64); + testNotOnlyNull(-MIN_FLOAT64, "-" + str_min_float64); + testNotOnlyNull(std::numeric_limits::infinity(), "+Inf"); + testNotOnlyNull(-std::numeric_limits::infinity(), "-Inf"); + testNotOnlyNull(-std::numeric_limits::quiet_NaN(), "NaN"); } CATCH diff --git a/dbms/src/IO/WriteHelpers.cpp b/dbms/src/IO/WriteHelpers.cpp index 8152483e509..bc1248d1eb1 100644 --- a/dbms/src/IO/WriteHelpers.cpp +++ b/dbms/src/IO/WriteHelpers.cpp @@ -16,6 +16,8 @@ #include #include +#include + namespace DB { @@ -83,4 +85,165 @@ void writePointerHex(const void * ptr, WriteBuffer & buf) buf.write(hex_str, 2 * sizeof(ptr)); } +template +void writeFloatTextNoExp(T x, WriteBuffer & buf) +{ + constexpr std::string_view nan = "NaN"; + constexpr std::string_view neg_inf = "-Inf"; + constexpr std::string_view inf = "+Inf"; + constexpr auto c_neg = '-'; + constexpr auto c_zero = '0'; + constexpr auto c_dot = '.'; + constexpr auto c_exp = 'e'; + + static_assert( + std::is_same_v || std::is_same_v, + "Argument for writeFloatText must be float or double"); + + using Converter = DoubleConverter; + + Converter::BufferType buffer; + double_conversion::StringBuilder builder{buffer, sizeof(buffer)}; + + bool result = false; + if constexpr (std::is_same_v) + result = Converter::instance().ToShortest(x, &builder); + else + result = Converter::instance().ToShortestSingle(x, &builder); + + if (!result) + throw Exception("Cannot print floating point number", ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER); + + std::string_view sv(buffer, builder.position()); + if (sv == "nan") + { + buf.write(nan.data(), nan.size()); + return; + } + else if (sv == "-inf") + { + buf.write(neg_inf.data(), neg_inf.size()); + return; + } + else if (sv == "inf") + { + buf.write(inf.data(), inf.size()); + return; + } + + bool neg = buffer[0] == c_neg; + Int64 bg = 0, ed = sv.size(); + if (neg) + { + bg++; + } + + // return zero + if (ed - bg == 1 && sv[bg] == c_zero) + { + buf.write(sv.data(), sv.size()); + return; + } + + Int64 exp_pos = sv.find(c_exp); + if (exp_pos < 0) + { + buf.write(sv.data(), sv.size()); + return; + } + + Int64 exp10 = 0; + { + auto exp_sv = sv.substr(exp_pos + 1); + std::from_chars(exp_sv.begin(), exp_sv.end(), exp10); + ed = exp_pos; + } + + // format: +.++e? or +e? + auto int_bg = bg, int_ed = ed, float_bg = ed, float_ed = ed; + + if (const auto begin = sv.data() + bg, end = sv.data() + ed, dot_pos = std::find(begin, end, c_dot); dot_pos != end) + { + int_ed = dot_pos - sv.data(); + float_bg = int_ed + 1; + + assert(int_ed - int_bg == 1); + assert(float_ed - float_bg > 0); + } + else + { + assert(int_ed - int_bg == 1); + assert(float_ed - float_bg == 0); + } + + assert(sv[int_bg] != c_zero); + + const auto put_char = [&buf](char c) { + buf.write(c); + }; + const auto put_zero = [&]() { + put_char(c_zero); + }; + const auto put_n_zero = [&](Int64 n) { + constexpr int size = 32; + const static auto data = ({ + std::array b{}; + b.fill('0'); + b; + }); + for (; n >= size; n -= size) + { + buf.write(data.data(), size); + } + for (; n; n--) + { + put_zero(); + } + }; + const auto put_dot = [&]() { + put_char(c_dot); + }; + const auto put_slice = [&buf](std::string_view s) { + buf.write(s.data(), s.size()); + }; + + if (neg) + { + put_char(c_neg); + } + + if (exp10 < 0) + { + exp10 = -exp10; + put_zero(); + put_dot(); + exp10 -= 1; + put_n_zero(exp10); + put_slice({sv.data() + int_bg, sv.data() + int_ed}); + put_slice({sv.data() + float_bg, sv.data() + float_ed}); + } + else + { + put_slice({sv.data() + int_bg, sv.data() + int_ed}); + + if (exp10 < (float_ed - float_bg)) + { + put_slice({sv.data() + float_bg, sv.data() + float_bg + exp10}); + + put_dot(); + float_bg += exp10; + put_slice({sv.data() + float_bg, sv.data() + float_ed}); + } + else + { + put_slice({sv.data() + float_bg, sv.data() + float_ed}); + exp10 -= (float_ed - float_bg); + put_n_zero(exp10); + } + } +} + +template void writeFloatTextNoExp(Float64 x, WriteBuffer & buf); +template void writeFloatTextNoExp(Float32 x, WriteBuffer & buf); + } // namespace DB