-
Notifications
You must be signed in to change notification settings - Fork 411
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
22 changed files
with
2,544 additions
and
127 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -187,3 +187,5 @@ endif () | |
add_subdirectory(magic_enum) | ||
|
||
add_subdirectory(aws-cmake) | ||
|
||
add_subdirectory(simdjson) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,124 @@ | ||
// Copyright 2023 PingCAP, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#pragma once | ||
|
||
#include <common/likely.h> | ||
#include <common/types.h> | ||
|
||
#include <cmath> | ||
|
||
namespace DB | ||
{ | ||
template <typename VectorType> | ||
class VectorWriter | ||
{ | ||
public: | ||
using Position = char *; | ||
|
||
explicit VectorWriter(VectorType & vector_, size_t initial_size = 16) | ||
: vector(vector_) | ||
{ | ||
if (vector.size() < initial_size) | ||
vector.resize(initial_size); | ||
pos = reinterpret_cast<Position>(vector.data()); | ||
end = reinterpret_cast<Position>(vector.data() + vector.size()); | ||
} | ||
|
||
inline void write(char x) | ||
{ | ||
reserveForNextSize(1); | ||
*pos = x; | ||
++pos; | ||
} | ||
|
||
void write(const char * from, size_t n) | ||
{ | ||
if (unlikely(n == 0)) | ||
return; | ||
reserveForNextSize(n); | ||
std::memcpy(pos, from, n); | ||
pos += n; | ||
} | ||
|
||
void setOffset(size_t new_offset) | ||
{ | ||
if (new_offset > vector.size()) | ||
{ | ||
size_t request_size = (new_offset - count()); | ||
reserveForNextSize(request_size); | ||
} | ||
pos = reinterpret_cast<Position>(vector.data() + new_offset); | ||
} | ||
|
||
void advance(size_t n) { setOffset(offset() + n); } | ||
|
||
size_t offset() { return pos - reinterpret_cast<Position>(vector.data()); } | ||
|
||
size_t count() { return offset(); } | ||
|
||
~VectorWriter() | ||
{ | ||
vector.resize(count()); | ||
pos = nullptr; | ||
end = nullptr; | ||
} | ||
|
||
private: | ||
size_t remainingSize() const { return static_cast<size_t>(end - pos); } | ||
|
||
void reserve(size_t new_size) | ||
{ | ||
size_t pos_offset = offset(); | ||
vector.resize(new_size); | ||
pos = reinterpret_cast<Position>(vector.data() + pos_offset); | ||
end = reinterpret_cast<Position>(vector.data() + vector.size()); | ||
} | ||
|
||
void reserveForNextSize(size_t request_size = 1) | ||
{ | ||
assert(request_size > 0); | ||
if (remainingSize() < request_size) | ||
{ | ||
size_t old_size = vector.size(); | ||
size_t new_size = std::max(old_size + request_size, std::ceil(old_size * 1.5)); | ||
reserve(new_size); | ||
} | ||
} | ||
|
||
private: | ||
static_assert(sizeof(typename VectorType::value_type) == sizeof(char)); | ||
VectorType & vector; | ||
|
||
Position pos = nullptr; | ||
Position end = nullptr; | ||
}; | ||
|
||
template <typename VectorWriter> | ||
inline void writeChar(char x, VectorWriter & writer) | ||
{ | ||
writer.write(x); | ||
} | ||
|
||
template <typename VectorWriter> | ||
inline void writeVarUInt(UInt64 x, VectorWriter & writer) | ||
{ | ||
while (x >= 0x80) | ||
{ | ||
writeChar(static_cast<UInt8>(x) | 0x80, writer); | ||
x >>= 7; | ||
} | ||
writeChar(x, writer); | ||
} | ||
} // namespace DB |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,216 @@ | ||
// Copyright 2023 PingCAP, Inc. | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
#include <gtest/gtest.h> | ||
#include <simdjson.h> | ||
|
||
namespace DB::tests | ||
{ | ||
TEST(TestSIMDJson, error) | ||
{ | ||
simdjson::dom::parser parser; | ||
{ | ||
std::string json_str{}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.error()); | ||
} | ||
{ | ||
std::string json_str{"[]]"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.error()); | ||
} | ||
{ | ||
std::string json_str{"fsdfhsdjhfjsdhfj"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.error()); | ||
} | ||
{ | ||
std::string json_str{"{}}"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.error()); | ||
} | ||
{ | ||
std::string json_str{"[[], [[fdjfhdjf]]]"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.error()); | ||
} | ||
} | ||
|
||
TEST(TestSIMDJson, literal) | ||
{ | ||
simdjson::dom::parser parser; | ||
{ | ||
std::string json_str{"0"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_number()); | ||
auto actual = res.get_double(); | ||
ASSERT_TRUE(!actual.error()); | ||
ASSERT_EQ(actual.value_unsafe(), 0); | ||
} | ||
{ | ||
std::string json_str{"1"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_number()); | ||
auto actual = res.get_double(); | ||
ASSERT_TRUE(!actual.error()); | ||
ASSERT_EQ(actual.value_unsafe(), 1); | ||
} | ||
{ | ||
std::string json_str{"-1"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_number()); | ||
auto actual = res.get_double(); | ||
ASSERT_TRUE(!actual.error()); | ||
ASSERT_EQ(actual.value_unsafe(), -1); | ||
} | ||
{ | ||
std::string json_str{"1.111"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_number()); | ||
auto actual = res.get_double(); | ||
ASSERT_TRUE(!actual.error()); | ||
ASSERT_EQ(actual.value_unsafe(), 1.111); | ||
} | ||
{ | ||
std::string json_str{"-1.111"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_number()); | ||
auto actual = res.get_double(); | ||
ASSERT_TRUE(!actual.error()); | ||
ASSERT_EQ(actual.value_unsafe(), -1.111); | ||
} | ||
{ | ||
std::string json_str{"true"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_bool()); | ||
auto actual = res.get_bool(); | ||
ASSERT_TRUE(!actual.error()); | ||
ASSERT_EQ(actual.value_unsafe(), true); | ||
} | ||
{ | ||
std::string json_str{"false"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_bool()); | ||
auto actual = res.get_bool(); | ||
ASSERT_TRUE(!actual.error()); | ||
ASSERT_EQ(actual.value_unsafe(), false); | ||
} | ||
{ | ||
std::string json_str{"null"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_null()); | ||
} | ||
{ | ||
std::string json_str{"\"a\""}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_string()); | ||
auto actual = res.get_string(); | ||
ASSERT_TRUE(!actual.error()); | ||
ASSERT_EQ(std::string(actual.value_unsafe()), "a"); | ||
} | ||
} | ||
|
||
TEST(TestSIMDJson, array) | ||
{ | ||
simdjson::dom::parser parser; | ||
{ | ||
std::string json_str{"[]"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_array()); | ||
auto array = res.get_array(); | ||
ASSERT_TRUE(!array.error()); | ||
const auto & actual = array.value_unsafe(); | ||
ASSERT_EQ(actual.size(), 0); | ||
} | ||
{ | ||
std::string json_str{"[1, 2]"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_array()); | ||
auto array = res.get_array(); | ||
ASSERT_TRUE(!array.error()); | ||
const auto & actual = array.value_unsafe(); | ||
ASSERT_EQ(actual.size(), 2); | ||
} | ||
{ | ||
std::string json_str{"[1,2]"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_array()); | ||
auto array = res.get_array(); | ||
ASSERT_TRUE(!array.error()); | ||
const auto & actual = array.value_unsafe(); | ||
ASSERT_EQ(actual.size(), 2); | ||
} | ||
{ | ||
std::string json_str{"[[]]"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_array()); | ||
auto array = res.get_array(); | ||
ASSERT_TRUE(!array.error()); | ||
const auto & actual = array.value_unsafe(); | ||
ASSERT_EQ(actual.size(), 1); | ||
ASSERT_TRUE(actual.at(0).is_array()); | ||
} | ||
} | ||
|
||
TEST(TestSIMDJson, object) | ||
{ | ||
simdjson::dom::parser parser; | ||
{ | ||
std::string json_str{"{}"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_object()); | ||
auto obj = res.get_object(); | ||
ASSERT_TRUE(!obj.error()); | ||
const auto & actual = obj.value_unsafe(); | ||
ASSERT_EQ(actual.size(), 0); | ||
} | ||
{ | ||
std::string json_str{R"({"a":"b"})"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_object()); | ||
auto obj = res.get_object(); | ||
ASSERT_TRUE(!obj.error()); | ||
const auto & actual = obj.value_unsafe(); | ||
ASSERT_EQ(actual.size(), 1); | ||
const auto & value = actual.at_key("a"); | ||
ASSERT_TRUE(value.is_string()); | ||
ASSERT_EQ(std::string(value.get_string().value_unsafe()), "b"); | ||
} | ||
{ | ||
std::string json_str{R"({"a" : "b"})"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_object()); | ||
auto obj = res.get_object(); | ||
ASSERT_TRUE(!obj.error()); | ||
const auto & actual = obj.value_unsafe(); | ||
ASSERT_EQ(actual.size(), 1); | ||
const auto & value = actual.at_key("a"); | ||
ASSERT_TRUE(value.is_string()); | ||
ASSERT_EQ(std::string(value.get_string().value_unsafe()), "b"); | ||
} | ||
{ | ||
std::string json_str{R"({"a" : "b", "c":"d"})"}; | ||
auto res = parser.parse(json_str); | ||
ASSERT_TRUE(res.is_object()); | ||
auto obj = res.get_object(); | ||
ASSERT_TRUE(!obj.error()); | ||
const auto & actual = obj.value_unsafe(); | ||
ASSERT_EQ(actual.size(), 2); | ||
const auto & value = actual.at_key("c"); | ||
ASSERT_TRUE(value.is_string()); | ||
ASSERT_EQ(std::string(value.get_string().value_unsafe()), "d"); | ||
} | ||
} | ||
|
||
} // namespace DB::tests |
Oops, something went wrong.