pingcap · ti-chi-bot · Dec 5, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 5, 2024
diff --git a/dbms/src/Functions/FunctionsTiDBConversion.h b/dbms/src/Functions/FunctionsTiDBConversion.h
@@ -65,6 +65,8 @@
 namespace DB
 {
 String trim(const StringRef & value);
+template <typename T>
+void writeFloatTextNoExp(T x, WriteBuffer & buf);
 
 enum CastError
 {
@@ -244,7 +246,14 @@ struct TiDBConvertToString
             for (size_t i = 0; i < size; ++i)
             {
                 WriteBufferFromVector<ColumnString::Chars_t> element_write_buffer(container_per_element);
-                FormatImpl<FromDataType>::execute(vec_from[i], element_write_buffer, &type, nullptr);
+                if constexpr (std::is_floating_point_v<FromFieldType>)
+                {
+                    writeFloatTextNoExp(vec_from[i], element_write_buffer);
+                }
+                else
+                {
+                    FormatImpl<FromDataType>::execute(vec_from[i], element_write_buffer, &type, nullptr);
+                }
                 size_t byte_length = element_write_buffer.count();
                 if (tp.flen() >= 0)
                     byte_length = std::min(byte_length, tp.flen());

diff --git a/dbms/src/Functions/tests/gtest_tidb_conversion.cpp b/dbms/src/Functions/tests/gtest_tidb_conversion.cpp
@@ -26,11 +26,48 @@
 #include <common/types.h>
 #include <gtest/gtest.h>
 
-#include <limits>
+namespace DB
+{
+template <typename T>
+void writeFloatTextNoExp(T x, WriteBuffer & buf);
+}
+
 namespace DB::tests
 {
 namespace
 {
+
+template <typename T>
+std::string formatFloat(const T x)
+{
+    std::string res;
+    WriteBufferFromString buf(res);
+    writeFloatTextNoExp(x, buf);
+    res.resize(buf.count());
+    return res;
+}
+
+String genFloatStr(std::string_view val, int zero_n)
+{
+    assert(zero_n > 0);
+
+    String s;
+    s.resize(val.size() + zero_n, '0');
+    std::memcpy(s.data(), val.data(), val.size());
+    return s;
+}
+
+String genFloatStr(int zero_n, std::string_view val)
+{
+    assert(zero_n > 0);
+
+    String s;
+    s.resize(val.size() + zero_n + 1, '0');
+    s[1] = '.';
+    std::memcpy(s.data() + zero_n + 1, val.data(), val.size());
+    return s;
+}
+
 auto getDatetimeColumn(bool single_field = false)
 {
     MyDateTime datetime(2021, 10, 26, 16, 8, 59, 0);
@@ -302,7 +339,7 @@ class TestTidbConversion : public DB::tests::FunctionTest
         ASSERT_TRUE(!input_decimal.empty());
         size_t prec = input_decimal.length();
         size_t scale = 0;
-        auto pos = input_decimal.find(".");
+        auto pos = input_decimal.find('.');
         if (pos != std::string::npos)
         {
             ASSERT_TRUE(input_decimal.length() >= pos + 1);
@@ -1086,34 +1123,52 @@ CATCH
 TEST_F(TestTidbConversion, castRealAsString)
 try
 {
+    const String str_max_float32 = genFloatStr("34028235", 31);
+    const String str_min_float32 = genFloatStr(38, "11754944");
+    const String str_max_float64 = genFloatStr("17976931348623157", 292);
+    const String str_min_float64 = genFloatStr(308, "22250738585072014");
+
+    ASSERT_EQ(formatFloat(MAX_FLOAT32), str_max_float32);
+    ASSERT_EQ(formatFloat(MIN_FLOAT32), str_min_float32);
+    ASSERT_EQ(formatFloat(-MAX_FLOAT32), "-" + str_max_float32);
+    ASSERT_EQ(formatFloat(-MIN_FLOAT32), "-" + str_min_float32);
+    ASSERT_EQ(formatFloat(std::numeric_limits<Float32>::infinity()), "+Inf");
+    ASSERT_EQ(formatFloat(-std::numeric_limits<Float32>::infinity()), "-Inf");
+    ASSERT_EQ(formatFloat(-std::numeric_limits<Float32>::quiet_NaN()), "NaN");
+
+    ASSERT_EQ(formatFloat(MAX_FLOAT64), str_max_float64);
+    ASSERT_EQ(formatFloat(MIN_FLOAT64), str_min_float64);
+    ASSERT_EQ(formatFloat(-MAX_FLOAT64), "-" + str_max_float64);
+    ASSERT_EQ(formatFloat(-MIN_FLOAT64), "-" + str_min_float64);
+    ASSERT_EQ(formatFloat(std::numeric_limits<Float64>::infinity()), "+Inf");
+    ASSERT_EQ(formatFloat(-std::numeric_limits<Float64>::infinity()), "-Inf");
+    ASSERT_EQ(formatFloat(-std::numeric_limits<Float64>::quiet_NaN()), "NaN");
+
     testOnlyNull<Float32, String>();
     testOnlyNull<Float64, String>();
 
-    // TODO add tests after non-expected results fixed
-
     testNotOnlyNull<Float32, String>(0, "0");
     testNotOnlyNull<Float32, String>(12.213, "12.213");
     testNotOnlyNull<Float32, String>(-12.213, "-12.213");
-    // tiflash: 3.4028235e38
-    // tidb: 340282350000000000000000000000000000000
-    // mysql: 3.40282e38
-    // testNotOnlyNull<Float32, String>(MAX_FLOAT32, "3.4028235e38");
-    // tiflash: 1.1754944e-38
-    // tidb: 0.000000000000000000000000000000000000011754944
-    // mysql: 1.17549e-38
-    // testNotOnlyNull<Float32, String>(MIN_FLOAT32, "1.1754944e-38");
-
     testNotOnlyNull<Float64, String>(0, "0");
     testNotOnlyNull<Float64, String>(12.213, "12.213");
     testNotOnlyNull<Float64, String>(-12.213, "-12.213");
-    // tiflash: 1.7976931348623157e308
-    // tidb: 179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
-    // mysql: 1.7976931348623157e308
-    // testNotOnlyNull<Float64, String>(MAX_FLOAT64, "1.7976931348623157e308");
-    // tiflash: 2.2250738585072014e-308
-    // tidb: 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072014
-    // mysql: 2.2250738585072014e-308
-    // testNotOnlyNull<Float64, String>(MIN_FLOAT64, "2.2250738585072014e-308");
+
+    testNotOnlyNull<Float32, String>(MAX_FLOAT32, str_max_float32);
+    testNotOnlyNull<Float32, String>(MIN_FLOAT32, str_min_float32);
+    testNotOnlyNull<Float32, String>(-MAX_FLOAT32, "-" + str_max_float32);
+    testNotOnlyNull<Float32, String>(-MIN_FLOAT32, "-" + str_min_float32);
+    testNotOnlyNull<Float32, String>(std::numeric_limits<Float32>::infinity(), "+Inf");
+    testNotOnlyNull<Float32, String>(-std::numeric_limits<Float32>::infinity(), "-Inf");
+    testNotOnlyNull<Float32, String>(-std::numeric_limits<Float32>::quiet_NaN(), "NaN");
+
+    testNotOnlyNull<Float64, String>(MAX_FLOAT64, str_max_float64);
+    testNotOnlyNull<Float64, String>(MIN_FLOAT64, str_min_float64);
+    testNotOnlyNull<Float64, String>(-MAX_FLOAT64, "-" + str_max_float64);
+    testNotOnlyNull<Float64, String>(-MIN_FLOAT64, "-" + str_min_float64);
+    testNotOnlyNull<Float64, String>(std::numeric_limits<Float64>::infinity(), "+Inf");
+    testNotOnlyNull<Float64, String>(-std::numeric_limits<Float64>::infinity(), "-Inf");
+    testNotOnlyNull<Float64, String>(-std::numeric_limits<Float64>::quiet_NaN(), "NaN");
 }
 CATCH
 

diff --git a/dbms/src/IO/WriteHelpers.cpp b/dbms/src/IO/WriteHelpers.cpp
@@ -16,6 +16,8 @@
 #include <IO/WriteHelpers.h>
 #include <inttypes.h>
 
+#include <charconv>
+
 
 namespace DB
 {
@@ -83,4 +85,167 @@ void writePointerHex(const void * ptr, WriteBuffer & buf)
     buf.write(hex_str, 2 * sizeof(ptr));
 }
 
+template <typename T>
+void writeFloatTextNoExp(T x, WriteBuffer & buf)
+{
+    static_assert(
+        std::is_same_v<T, double> || std::is_same_v<T, float>,
+        "Argument for writeFloatText must be float or double");
+
+    using Converter = DoubleConverter<false>;
+
+    Converter::BufferType buffer;
+    double_conversion::StringBuilder builder{buffer, sizeof(buffer)};
+
+    bool result = false;
+    if constexpr (std::is_same_v<T, double>)
+        result = Converter::instance().ToShortest(x, &builder);
+    else
+        result = Converter::instance().ToShortestSingle(x, &builder);
+
+    if (!result)
+        throw Exception("Cannot print floating point number", ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER);
+
+    constexpr std::string_view nan = "NaN";
+    constexpr std::string_view neg_inf = "-Inf";
+    constexpr std::string_view inf = "+Inf";
+
+    std::string_view sv{buffer, static_cast<size_t>(builder.position())};
+    if (sv == "nan")
+    {
+        buf.write(nan.data(), nan.size());
+        return;
+    }
+    else if (sv == "-inf")
+    {
+        buf.write(neg_inf.data(), neg_inf.size());
+        return;
+    }
+    else if (sv == "inf")
+    {
+        buf.write(inf.data(), inf.size());
+        return;
+    }
+
+    constexpr auto c_neg = '-';
+    constexpr auto c_zero = '0';
+    constexpr auto c_dot = '.';
+    constexpr auto c_exp = 'e';
+
+    bool neg = buffer[0] == c_neg;
+    int bg = 0, ed = builder.position();
+    if (neg)
+    {
+        bg++;
+    }
+
+    // return zero
+    if (ed - bg == 1 && sv[bg] == c_zero)
+    {
+        buf.write(sv.data(), sv.size());
+        return;
+    }
+
+    Int64 exp_pos = sv.find(c_exp);
+    if (exp_pos < 0)
+    {
+        buf.write(sv.data(), sv.size());
+        return;
+    }
+
+    Int64 exp10 = 0;
+    if (exp_pos >= 0)
+    {
+        auto exp_sv = sv.substr(exp_pos + 1);
+        std::from_chars(exp_sv.begin(), exp_sv.end(), exp10);
+        ed = exp_pos;
+    }
+
+    // format: +.++e? or +e?
+    auto int_bg = bg, int_ed = ed, float_bg = ed, float_ed = ed;
+
+    if (const auto begin = sv.data() + bg, end = sv.data() + ed, dot_pos = std::find(begin, end, c_dot); dot_pos != end)
+    {
+        int_ed = dot_pos - sv.data();
+        float_bg = int_ed + 1;
+
+        assert(int_ed - int_bg == 1);
+        assert(float_ed - float_bg > 0);
+    }
+    else
+    {
+        assert(int_ed - int_bg == 1);
+        assert(float_ed - float_bg == 0);
+    }
+
+    assert(sv[int_bg] != c_zero);
+
+    const auto put_char = [&buf](char c) {
+        buf.write(c);
+    };
+    const auto put_zero = [&]() {
+        put_char(c_zero);
+    };
+    const auto put_n_zero = [&](Int64 n) {
+        constexpr int size = 32;
+        const static auto data = ({
+            std::array<char, size> b{};
+            b.fill('0');
+            b;
+        });
+        for (; n >= size; n -= size)
+        {
+            buf.write(data.data(), size);
+        }
+        for (; n; n--)
+        {
+            put_zero();
+        }
+    };
+    const auto put_dot = [&]() {
+        put_char(c_dot);
+    };
+    const auto put_slice = [&buf](std::string_view s) {
+        buf.write(s.data(), s.size());
+    };
+
+    if (neg)
+    {
+        put_char(c_neg);
+    }
+
+    if (exp10 < 0)
+    {
+        exp10 = -exp10;
+        put_zero();
+        put_dot();
+        exp10 -= 1;
+        put_n_zero(exp10);
+        put_slice({sv.data() + int_bg, sv.data() + int_ed});
+        put_slice({sv.data() + float_bg, sv.data() + float_ed});
+    }
+    else
+    {
+        put_slice({sv.data() + int_bg, sv.data() + int_ed});
+
+        if (exp10 < (float_ed - float_bg))
+        {
+            put_slice({sv.data() + float_bg, sv.data() + float_bg + exp10});
+
+            put_dot();
+            float_bg += exp10;
+            put_slice({sv.data() + float_bg, sv.data() + float_ed});
+        }
+        else
+        {
+            put_slice({sv.data() + float_bg, sv.data() + float_ed});
+            exp10 -= (float_ed - float_bg);
+            put_n_zero(exp10);
+        }
+    }
+}
+
+template void writeFloatTextNoExp<Float64>(Float64 x, WriteBuffer & buf);
+template void writeFloatTextNoExp<Float32>(Float32 x, WriteBuffer & buf);
+
 } // namespace DB