Skip to content

Commit

Permalink
support cast as json (#8333)
Browse files Browse the repository at this point in the history
close #8307, close #8371
  • Loading branch information
SeaRise authored Nov 27, 2023
1 parent a42d69f commit d1b149e
Show file tree
Hide file tree
Showing 22 changed files with 2,544 additions and 127 deletions.
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -140,3 +140,6 @@
[submodule "contrib/qpl"]
path = contrib/qpl
url = https://github.com/intel/qpl.git
[submodule "contrib/simdjson"]
path = contrib/simdjson
url = https://github.com/simdjson/simdjson
2 changes: 2 additions & 0 deletions contrib/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,5 @@ endif ()
add_subdirectory(magic_enum)

add_subdirectory(aws-cmake)

add_subdirectory(simdjson)
1 change: 1 addition & 0 deletions contrib/simdjson
Submodule simdjson added at 17cb45
1 change: 1 addition & 0 deletions dbms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ target_link_libraries (tiflash_common_io
prometheus-cpp::pull
cpptoml
magic_enum
simdjson
libsymbolization
${RE2_LIBRARY}
${RE2_ST_LIBRARY}
Expand Down
124 changes: 124 additions & 0 deletions dbms/src/Common/VectorWriter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <common/likely.h>
#include <common/types.h>

#include <cmath>

namespace DB
{
template <typename VectorType>
class VectorWriter
{
public:
using Position = char *;

explicit VectorWriter(VectorType & vector_, size_t initial_size = 16)
: vector(vector_)
{
if (vector.size() < initial_size)
vector.resize(initial_size);
pos = reinterpret_cast<Position>(vector.data());
end = reinterpret_cast<Position>(vector.data() + vector.size());
}

inline void write(char x)
{
reserveForNextSize(1);
*pos = x;
++pos;
}

void write(const char * from, size_t n)
{
if (unlikely(n == 0))
return;
reserveForNextSize(n);
std::memcpy(pos, from, n);
pos += n;
}

void setOffset(size_t new_offset)
{
if (new_offset > vector.size())
{
size_t request_size = (new_offset - count());
reserveForNextSize(request_size);
}
pos = reinterpret_cast<Position>(vector.data() + new_offset);
}

void advance(size_t n) { setOffset(offset() + n); }

size_t offset() { return pos - reinterpret_cast<Position>(vector.data()); }

size_t count() { return offset(); }

~VectorWriter()
{
vector.resize(count());
pos = nullptr;
end = nullptr;
}

private:
size_t remainingSize() const { return static_cast<size_t>(end - pos); }

void reserve(size_t new_size)
{
size_t pos_offset = offset();
vector.resize(new_size);
pos = reinterpret_cast<Position>(vector.data() + pos_offset);
end = reinterpret_cast<Position>(vector.data() + vector.size());
}

void reserveForNextSize(size_t request_size = 1)
{
assert(request_size > 0);
if (remainingSize() < request_size)
{
size_t old_size = vector.size();
size_t new_size = std::max(old_size + request_size, std::ceil(old_size * 1.5));
reserve(new_size);
}
}

private:
static_assert(sizeof(typename VectorType::value_type) == sizeof(char));
VectorType & vector;

Position pos = nullptr;
Position end = nullptr;
};

template <typename VectorWriter>
inline void writeChar(char x, VectorWriter & writer)
{
writer.write(x);
}

template <typename VectorWriter>
inline void writeVarUInt(UInt64 x, VectorWriter & writer)
{
while (x >= 0x80)
{
writeChar(static_cast<UInt8>(x) | 0x80, writer);
x >>= 7;
}
writeChar(x, writer);
}
} // namespace DB
216 changes: 216 additions & 0 deletions dbms/src/Common/tests/gtest_simdjson.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
// Copyright 2023 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <gtest/gtest.h>
#include <simdjson.h>

namespace DB::tests
{
TEST(TestSIMDJson, error)
{
simdjson::dom::parser parser;
{
std::string json_str{};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.error());
}
{
std::string json_str{"[]]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.error());
}
{
std::string json_str{"fsdfhsdjhfjsdhfj"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.error());
}
{
std::string json_str{"{}}"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.error());
}
{
std::string json_str{"[[], [[fdjfhdjf]]]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.error());
}
}

TEST(TestSIMDJson, literal)
{
simdjson::dom::parser parser;
{
std::string json_str{"0"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_number());
auto actual = res.get_double();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), 0);
}
{
std::string json_str{"1"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_number());
auto actual = res.get_double();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), 1);
}
{
std::string json_str{"-1"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_number());
auto actual = res.get_double();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), -1);
}
{
std::string json_str{"1.111"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_number());
auto actual = res.get_double();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), 1.111);
}
{
std::string json_str{"-1.111"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_number());
auto actual = res.get_double();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), -1.111);
}
{
std::string json_str{"true"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_bool());
auto actual = res.get_bool();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), true);
}
{
std::string json_str{"false"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_bool());
auto actual = res.get_bool();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(actual.value_unsafe(), false);
}
{
std::string json_str{"null"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_null());
}
{
std::string json_str{"\"a\""};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_string());
auto actual = res.get_string();
ASSERT_TRUE(!actual.error());
ASSERT_EQ(std::string(actual.value_unsafe()), "a");
}
}

TEST(TestSIMDJson, array)
{
simdjson::dom::parser parser;
{
std::string json_str{"[]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_array());
auto array = res.get_array();
ASSERT_TRUE(!array.error());
const auto & actual = array.value_unsafe();
ASSERT_EQ(actual.size(), 0);
}
{
std::string json_str{"[1, 2]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_array());
auto array = res.get_array();
ASSERT_TRUE(!array.error());
const auto & actual = array.value_unsafe();
ASSERT_EQ(actual.size(), 2);
}
{
std::string json_str{"[1,2]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_array());
auto array = res.get_array();
ASSERT_TRUE(!array.error());
const auto & actual = array.value_unsafe();
ASSERT_EQ(actual.size(), 2);
}
{
std::string json_str{"[[]]"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_array());
auto array = res.get_array();
ASSERT_TRUE(!array.error());
const auto & actual = array.value_unsafe();
ASSERT_EQ(actual.size(), 1);
ASSERT_TRUE(actual.at(0).is_array());
}
}

TEST(TestSIMDJson, object)
{
simdjson::dom::parser parser;
{
std::string json_str{"{}"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_object());
auto obj = res.get_object();
ASSERT_TRUE(!obj.error());
const auto & actual = obj.value_unsafe();
ASSERT_EQ(actual.size(), 0);
}
{
std::string json_str{R"({"a":"b"})"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_object());
auto obj = res.get_object();
ASSERT_TRUE(!obj.error());
const auto & actual = obj.value_unsafe();
ASSERT_EQ(actual.size(), 1);
const auto & value = actual.at_key("a");
ASSERT_TRUE(value.is_string());
ASSERT_EQ(std::string(value.get_string().value_unsafe()), "b");
}
{
std::string json_str{R"({"a" : "b"})"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_object());
auto obj = res.get_object();
ASSERT_TRUE(!obj.error());
const auto & actual = obj.value_unsafe();
ASSERT_EQ(actual.size(), 1);
const auto & value = actual.at_key("a");
ASSERT_TRUE(value.is_string());
ASSERT_EQ(std::string(value.get_string().value_unsafe()), "b");
}
{
std::string json_str{R"({"a" : "b", "c":"d"})"};
auto res = parser.parse(json_str);
ASSERT_TRUE(res.is_object());
auto obj = res.get_object();
ASSERT_TRUE(!obj.error());
const auto & actual = obj.value_unsafe();
ASSERT_EQ(actual.size(), 2);
const auto & value = actual.at_key("c");
ASSERT_TRUE(value.is_string());
ASSERT_EQ(std::string(value.get_string().value_unsafe()), "d");
}
}

} // namespace DB::tests
Loading

0 comments on commit d1b149e

Please sign in to comment.