Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix incompatible behavior with TiDB when casting float into string #9696

Merged
merged 4 commits into from
Dec 5, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion dbms/src/Functions/FunctionsTiDBConversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,8 @@
namespace DB
{
String trim(const StringRef & value);
template <typename T>
void writeFloatTextNoExp(T x, WriteBuffer & buf);

enum CastError
{
Expand Down Expand Up @@ -244,7 +246,14 @@ struct TiDBConvertToString
for (size_t i = 0; i < size; ++i)
{
WriteBufferFromVector<ColumnString::Chars_t> element_write_buffer(container_per_element);
FormatImpl<FromDataType>::execute(vec_from[i], element_write_buffer, &type, nullptr);
if constexpr (std::is_floating_point_v<FromFieldType>)
{
writeFloatTextNoExp(vec_from[i], element_write_buffer);
}
else
{
FormatImpl<FromDataType>::execute(vec_from[i], element_write_buffer, &type, nullptr);
}
size_t byte_length = element_write_buffer.count();
if (tp.flen() >= 0)
byte_length = std::min(byte_length, tp.flen());
Expand Down
97 changes: 76 additions & 21 deletions dbms/src/Functions/tests/gtest_tidb_conversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,48 @@
#include <common/types.h>
#include <gtest/gtest.h>

#include <limits>
namespace DB
{
template <typename T>
void writeFloatTextNoExp(T x, WriteBuffer & buf);
}

namespace DB::tests
{
namespace
{

template <typename T>
std::string formatFloat(const T x)
{
std::string res;
WriteBufferFromString buf(res);
writeFloatTextNoExp(x, buf);
res.resize(buf.count());
return res;
}

String genFloatStr(std::string_view val, int zero_n)
{
assert(zero_n > 0);

String s;
s.resize(val.size() + zero_n, '0');
std::memcpy(s.data(), val.data(), val.size());
return s;
}

String genFloatStr(int zero_n, std::string_view val)
{
assert(zero_n > 0);

String s;
s.resize(val.size() + zero_n + 1, '0');
s[1] = '.';
std::memcpy(s.data() + zero_n + 1, val.data(), val.size());
return s;
}

auto getDatetimeColumn(bool single_field = false)
{
MyDateTime datetime(2021, 10, 26, 16, 8, 59, 0);
Expand Down Expand Up @@ -302,7 +339,7 @@ class TestTidbConversion : public DB::tests::FunctionTest
ASSERT_TRUE(!input_decimal.empty());
size_t prec = input_decimal.length();
size_t scale = 0;
auto pos = input_decimal.find(".");
auto pos = input_decimal.find('.');
if (pos != std::string::npos)
{
ASSERT_TRUE(input_decimal.length() >= pos + 1);
Expand Down Expand Up @@ -1086,34 +1123,52 @@ CATCH
TEST_F(TestTidbConversion, castRealAsString)
try
{
const String str_max_float32 = genFloatStr("34028235", 31);
const String str_min_float32 = genFloatStr(38, "11754944");
const String str_max_float64 = genFloatStr("17976931348623157", 292);
const String str_min_float64 = genFloatStr(308, "22250738585072014");

ASSERT_EQ(formatFloat(MAX_FLOAT32), str_max_float32);
ASSERT_EQ(formatFloat(MIN_FLOAT32), str_min_float32);
ASSERT_EQ(formatFloat(-MAX_FLOAT32), "-" + str_max_float32);
ASSERT_EQ(formatFloat(-MIN_FLOAT32), "-" + str_min_float32);
ASSERT_EQ(formatFloat(std::numeric_limits<Float32>::infinity()), "+Inf");
ASSERT_EQ(formatFloat(-std::numeric_limits<Float32>::infinity()), "-Inf");
ASSERT_EQ(formatFloat(-std::numeric_limits<Float32>::quiet_NaN()), "NaN");

ASSERT_EQ(formatFloat(MAX_FLOAT64), str_max_float64);
ASSERT_EQ(formatFloat(MIN_FLOAT64), str_min_float64);
ASSERT_EQ(formatFloat(-MAX_FLOAT64), "-" + str_max_float64);
ASSERT_EQ(formatFloat(-MIN_FLOAT64), "-" + str_min_float64);
ASSERT_EQ(formatFloat(std::numeric_limits<Float64>::infinity()), "+Inf");
ASSERT_EQ(formatFloat(-std::numeric_limits<Float64>::infinity()), "-Inf");
ASSERT_EQ(formatFloat(-std::numeric_limits<Float64>::quiet_NaN()), "NaN");

testOnlyNull<Float32, String>();
testOnlyNull<Float64, String>();

// TODO add tests after non-expected results fixed

testNotOnlyNull<Float32, String>(0, "0");
testNotOnlyNull<Float32, String>(12.213, "12.213");
testNotOnlyNull<Float32, String>(-12.213, "-12.213");
// tiflash: 3.4028235e38
// tidb: 340282350000000000000000000000000000000
// mysql: 3.40282e38
// testNotOnlyNull<Float32, String>(MAX_FLOAT32, "3.4028235e38");
// tiflash: 1.1754944e-38
// tidb: 0.000000000000000000000000000000000000011754944
// mysql: 1.17549e-38
// testNotOnlyNull<Float32, String>(MIN_FLOAT32, "1.1754944e-38");

testNotOnlyNull<Float64, String>(0, "0");
testNotOnlyNull<Float64, String>(12.213, "12.213");
testNotOnlyNull<Float64, String>(-12.213, "-12.213");
// tiflash: 1.7976931348623157e308
// tidb: 179769313486231570000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000
// mysql: 1.7976931348623157e308
// testNotOnlyNull<Float64, String>(MAX_FLOAT64, "1.7976931348623157e308");
// tiflash: 2.2250738585072014e-308
// tidb: 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000022250738585072014
// mysql: 2.2250738585072014e-308
// testNotOnlyNull<Float64, String>(MIN_FLOAT64, "2.2250738585072014e-308");

testNotOnlyNull<Float32, String>(MAX_FLOAT32, str_max_float32);
testNotOnlyNull<Float32, String>(MIN_FLOAT32, str_min_float32);
testNotOnlyNull<Float32, String>(-MAX_FLOAT32, "-" + str_max_float32);
testNotOnlyNull<Float32, String>(-MIN_FLOAT32, "-" + str_min_float32);
testNotOnlyNull<Float32, String>(std::numeric_limits<Float32>::infinity(), "+Inf");
testNotOnlyNull<Float32, String>(-std::numeric_limits<Float32>::infinity(), "-Inf");
testNotOnlyNull<Float32, String>(-std::numeric_limits<Float32>::quiet_NaN(), "NaN");

testNotOnlyNull<Float64, String>(MAX_FLOAT64, str_max_float64);
testNotOnlyNull<Float64, String>(MIN_FLOAT64, str_min_float64);
testNotOnlyNull<Float64, String>(-MAX_FLOAT64, "-" + str_max_float64);
testNotOnlyNull<Float64, String>(-MIN_FLOAT64, "-" + str_min_float64);
testNotOnlyNull<Float64, String>(std::numeric_limits<Float64>::infinity(), "+Inf");
testNotOnlyNull<Float64, String>(-std::numeric_limits<Float64>::infinity(), "-Inf");
testNotOnlyNull<Float64, String>(-std::numeric_limits<Float64>::quiet_NaN(), "NaN");
}
CATCH

Expand Down
165 changes: 165 additions & 0 deletions dbms/src/IO/WriteHelpers.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include <IO/WriteHelpers.h>
#include <inttypes.h>

#include <charconv>


namespace DB
{
Expand Down Expand Up @@ -83,4 +85,167 @@ void writePointerHex(const void * ptr, WriteBuffer & buf)
buf.write(hex_str, 2 * sizeof(ptr));
}

template <typename T>
void writeFloatTextNoExp(T x, WriteBuffer & buf)
{
static_assert(
std::is_same_v<T, double> || std::is_same_v<T, float>,
"Argument for writeFloatText must be float or double");

using Converter = DoubleConverter<false>;

Converter::BufferType buffer;
double_conversion::StringBuilder builder{buffer, sizeof(buffer)};

bool result = false;
if constexpr (std::is_same_v<T, double>)
result = Converter::instance().ToShortest(x, &builder);
else
result = Converter::instance().ToShortestSingle(x, &builder);

if (!result)
throw Exception("Cannot print floating point number", ErrorCodes::CANNOT_PRINT_FLOAT_OR_DOUBLE_NUMBER);

constexpr std::string_view nan = "NaN";
constexpr std::string_view neg_inf = "-Inf";
constexpr std::string_view inf = "+Inf";

std::string_view sv{buffer, static_cast<size_t>(builder.position())};
if (sv == "nan")
{
buf.write(nan.data(), nan.size());
return;
}
else if (sv == "-inf")
{
buf.write(neg_inf.data(), neg_inf.size());
return;
}
else if (sv == "inf")
{
buf.write(inf.data(), inf.size());
return;
}

constexpr auto c_neg = '-';
constexpr auto c_zero = '0';
constexpr auto c_dot = '.';
constexpr auto c_exp = 'e';

bool neg = buffer[0] == c_neg;
int bg = 0, ed = builder.position();
if (neg)
{
bg++;
}

// return zero
if (ed - bg == 1 && sv[bg] == c_zero)
{
buf.write(sv.data(), sv.size());
return;
}

Int64 exp_pos = sv.find(c_exp);
if (exp_pos < 0)
{
buf.write(sv.data(), sv.size());
return;
}

Int64 exp10 = 0;
if (exp_pos >= 0)
solotzg marked this conversation as resolved.
Show resolved Hide resolved
{
auto exp_sv = sv.substr(exp_pos + 1);
std::from_chars(exp_sv.begin(), exp_sv.end(), exp10);
ed = exp_pos;
}

// format: +.++e? or +e?
auto int_bg = bg, int_ed = ed, float_bg = ed, float_ed = ed;

if (const auto begin = sv.data() + bg, end = sv.data() + ed, dot_pos = std::find(begin, end, c_dot); dot_pos != end)
{
int_ed = dot_pos - sv.data();
float_bg = int_ed + 1;

assert(int_ed - int_bg == 1);
assert(float_ed - float_bg > 0);
}
else
{
assert(int_ed - int_bg == 1);
assert(float_ed - float_bg == 0);
}

assert(sv[int_bg] != c_zero);

const auto put_char = [&buf](char c) {
buf.write(c);
};
const auto put_zero = [&]() {
put_char(c_zero);
};
const auto put_n_zero = [&](Int64 n) {
constexpr int size = 32;
const static auto data = ({
std::array<char, size> b{};
b.fill('0');
b;
});
for (; n >= size; n -= size)
{
buf.write(data.data(), size);
}
for (; n; n--)
{
put_zero();
}
};
const auto put_dot = [&]() {
put_char(c_dot);
};
const auto put_slice = [&buf](std::string_view s) {
buf.write(s.data(), s.size());
};

if (neg)
{
put_char(c_neg);
}

if (exp10 < 0)
{
exp10 = -exp10;
put_zero();
put_dot();
exp10 -= 1;
put_n_zero(exp10);
put_slice({sv.data() + int_bg, sv.data() + int_ed});
put_slice({sv.data() + float_bg, sv.data() + float_ed});
}
else
{
put_slice({sv.data() + int_bg, sv.data() + int_ed});

if (exp10 < (float_ed - float_bg))
{
put_slice({sv.data() + float_bg, sv.data() + float_bg + exp10});

put_dot();
float_bg += exp10;
put_slice({sv.data() + float_bg, sv.data() + float_ed});
}
else
{
put_slice({sv.data() + float_bg, sv.data() + float_ed});
exp10 -= (float_ed - float_bg);
put_n_zero(exp10);
}
}
}

template void writeFloatTextNoExp<Float64>(Float64 x, WriteBuffer & buf);
template void writeFloatTextNoExp<Float32>(Float32 x, WriteBuffer & buf);

} // namespace DB