Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vector: optimize read performance #9687

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dbms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,12 +102,13 @@ check_then_add_sources_compile_flag (
TIFLASH_ENABLE_ARCH_HASWELL_SUPPORT
"${TIFLASH_COMPILER_ARCH_HASWELL_FLAG}"
src/Columns/ColumnString.cpp
src/Columns/ColumnsCommon.cpp
src/Columns/ColumnVector.cpp
src/Columns/ColumnDecimal.cpp
src/Columns/ColumnArray.cpp
src/Columns/ColumnNullable.cpp
src/Columns/ColumnFixedString.cpp
src/Columns/countBytesInFilter.cpp
src/Columns/ColumnUtil.cpp
src/DataTypes/DataTypeString.cpp
src/Interpreters/Join.cpp
src/IO/Compression/EncodingUtil.cpp
Expand Down
12 changes: 6 additions & 6 deletions dbms/src/AggregateFunctions/AggregateFunctionCount.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

#include <AggregateFunctions/IAggregateFunction.h>
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <DataTypes/DataTypesNumber.h>
#include <IO/VarInt.h>
#include <IO/WriteHelpers.h>
Expand Down Expand Up @@ -112,7 +112,7 @@ class AggregateFunctionCount final
}

/// May be used for optimization.
void addDelta(AggregateDataPtr __restrict place, UInt64 x) const { data(place).count += x; }
static void addDelta(AggregateDataPtr __restrict place, UInt64 x) { data(place).count += x; }

const char * getHeaderFilePath() const override { return __FILE__; }
};
Expand All @@ -123,7 +123,7 @@ class AggregateFunctionCountNotNullUnary final
: public IAggregateFunctionDataHelper<AggregateFunctionCountData, AggregateFunctionCountNotNullUnary>
{
public:
AggregateFunctionCountNotNullUnary(const DataTypePtr & argument)
explicit AggregateFunctionCountNotNullUnary(const DataTypePtr & argument)
{
if (!argument->isNullable())
throw Exception(
Expand Down Expand Up @@ -202,7 +202,7 @@ class AggregateFunctionCountNotNullVariadic final
: public IAggregateFunctionDataHelper<AggregateFunctionCountData, AggregateFunctionCountNotNullVariadic>
{
public:
AggregateFunctionCountNotNullVariadic(const DataTypes & arguments)
explicit AggregateFunctionCountNotNullVariadic(const DataTypes & arguments)
{
number_of_arguments = arguments.size();

Expand All @@ -214,7 +214,7 @@ class AggregateFunctionCountNotNullVariadic final
if (number_of_arguments > MAX_ARGS)
throw Exception(
"Maximum number of arguments for aggregate function with Nullable types is "
+ toString(size_t(MAX_ARGS)),
+ toString(static_cast<size_t>(MAX_ARGS)),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

for (size_t i = 0; i < number_of_arguments; ++i)
Expand Down Expand Up @@ -262,7 +262,7 @@ class AggregateFunctionCountNotNullVariadic final
MAX_ARGS = 8
};
size_t number_of_arguments = 0;
std::array<char, MAX_ARGS> is_nullable; /// Plain array is better than std::vector due to one indirection less.
std::array<char, MAX_ARGS> is_nullable{}; /// Plain array is better than std::vector due to one indirection less.
};

} // namespace DB
2 changes: 1 addition & 1 deletion dbms/src/AggregateFunctions/AggregateFunctionNull.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Common/typeid_cast.h>
#include <DataTypes/DataTypeArray.h>
#include <DataTypes/DataTypeNullable.h>
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnAggregateFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
#include <AggregateFunctions/AggregateFunctionState.h>
#include <Columns/ColumnAggregateFunction.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Common/HashTable/Hash.h>
#include <Common/SipHash.h>
#include <Common/typeid_cast.h>
Expand Down
3 changes: 1 addition & 2 deletions dbms/src/Columns/ColumnArray.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@
#include <Columns/ColumnNullable.h>
#include <Columns/ColumnString.h>
#include <Columns/ColumnTuple.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/filterColumn.h>
#include <Common/Arena.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
Expand All @@ -30,7 +30,6 @@
#include <IO/WriteHelpers.h>
#include <string.h> // memcpy

#include <memory>

namespace DB
{
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnConst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.

#include <Columns/ColumnConst.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Common/HashTable/Hash.h>
#include <Common/typeid_cast.h>
#include <IO/WriteHelpers.h>
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnDecimal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.

#include <Columns/ColumnDecimal.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Common/Arena.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnFixedString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.

#include <Columns/ColumnFixedString.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Common/Arena.h>
#include <Common/HashTable/Hash.h>
#include <Common/SipHash.h>
Expand Down
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.

#include <Columns/ColumnFunction.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Functions/IFunction.h>
#include <Interpreters/ExpressionActions.h>
#include <fmt/format.h>
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Columns/ColumnString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.

#include <Columns/ColumnString.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/filterColumn.h>
#include <Common/HashTable/Hash.h>
#include <DataStreams/ColumnGathererStream.h>
#include <TiDB/Collation/CollatorUtils.h>
Expand Down
105 changes: 105 additions & 0 deletions dbms/src/Columns/ColumnUtil.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Columns/ColumnUtil.h>
#include <common/mem_utils_opt.h>

#if defined(__aarch64__) && defined(__ARM_NEON)
#include <arm_neon.h>
#elif defined(__SSE2__) || defined(__AVX2__) || defined(__AVX512F__) && defined(__AVX512BW__)
#include <immintrin.h>
#endif

#ifdef TIFLASH_ENABLE_AVX_SUPPORT
ASSERT_USE_AVX2_COMPILE_FLAG
#endif


namespace DB
{

UInt64 ToBits64(const UInt8 * bytes64)
{
#if defined(__AVX512F__) && defined(__AVX512BW__)
const __m512i vbytes = _mm512_loadu_si512(reinterpret_cast<const void *>(bytes64));
UInt64 res = _mm512_testn_epi8_mask(vbytes, vbytes);
#elif defined(__AVX2__)
const auto check_block = _mm256_setzero_si256();
uint64_t mask0 = mem_utils::details::get_block32_cmp_eq_mask(bytes64, check_block);
uint64_t mask1
= mem_utils::details::get_block32_cmp_eq_mask(bytes64 + mem_utils::details::BLOCK32_SIZE, check_block);
auto res = mask0 | (mask1 << mem_utils::details::BLOCK32_SIZE);
#elif defined(__SSE2__)
const auto zero16 = _mm_setzero_si128();
UInt64 res = static_cast<UInt64>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64)), zero16)))
| (static_cast<UInt64>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 16)), zero16)))
<< 16)
| (static_cast<UInt64>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 32)), zero16)))
<< 32)
| (static_cast<UInt64>(_mm_movemask_epi8(
_mm_cmpeq_epi8(_mm_loadu_si128(reinterpret_cast<const __m128i *>(bytes64 + 48)), zero16)))
<< 48);
#elif defined(__aarch64__) && defined(__ARM_NEON)
const uint8x16_t bitmask
= {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80, 0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
const auto * src = reinterpret_cast<const unsigned char *>(bytes64);
const uint8x16_t p0 = vceqzq_u8(vld1q_u8(src));
const uint8x16_t p1 = vceqzq_u8(vld1q_u8(src + 16));
const uint8x16_t p2 = vceqzq_u8(vld1q_u8(src + 32));
const uint8x16_t p3 = vceqzq_u8(vld1q_u8(src + 48));
uint8x16_t t0 = vandq_u8(p0, bitmask);
uint8x16_t t1 = vandq_u8(p1, bitmask);
uint8x16_t t2 = vandq_u8(p2, bitmask);
uint8x16_t t3 = vandq_u8(p3, bitmask);
uint8x16_t sum0 = vpaddq_u8(t0, t1);
uint8x16_t sum1 = vpaddq_u8(t2, t3);
sum0 = vpaddq_u8(sum0, sum1);
sum0 = vpaddq_u8(sum0, sum0);
UInt64 res = vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
#else
UInt64 res = 0;
for (size_t i = 0; i < 64; ++i)
res |= static_cast<UInt64>(0 == bytes64[i]) << i;
#endif
return ~res;
}

/// If mask is a number of this kind: [0]*[1]+ function returns the length of the cluster of 1s.
/// Otherwise it returns the special value: 0xFF.
/// Note: mask must be non-zero.
UInt8 prefixToCopy(UInt64 mask)
{
static constexpr UInt64 all_match = 0xFFFFFFFFFFFFFFFFULL;
if (mask == all_match)
return 64;
/// std::countl_zero count from the most significant bit of mask, corresponding to the tail of the original filter.
/// If only the tail of the original filter is zero, we can copy the prefix directly.
/// The length of tail zero if `leading_zeros`, so the length of the prefix to copy is 64 - #(leading zeroes).
const UInt64 leading_zeroes = std::countl_zero(mask);
if (mask == ((all_match << leading_zeroes) >> leading_zeroes))
return 64 - leading_zeroes;
else
return 0xFF;
}

UInt8 suffixToCopy(UInt64 mask)
{
const auto prefix_to_copy = prefixToCopy(~mask);
return prefix_to_copy >= 64 ? prefix_to_copy : 64 - prefix_to_copy;
}

} // namespace DB
33 changes: 33 additions & 0 deletions dbms/src/Columns/ColumnUtil.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// Copyright 2024 PingCAP, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <common/types.h>

namespace DB
{

UInt64 ToBits64(const UInt8 * bytes64);

constexpr size_t FILTER_SIMD_BYTES = 64;

/// If mask is a number of this kind: [0]*[1]+ function returns the length of the cluster of 1s.
/// Otherwise it returns the special value: 0xFF.
/// Note: mask must be non-zero.
UInt8 prefixToCopy(UInt64 mask);

UInt8 suffixToCopy(UInt64 mask);

} // namespace DB
3 changes: 2 additions & 1 deletion dbms/src/Columns/ColumnVector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
// limitations under the License.

#include <Columns/ColumnVector.h>
#include <Columns/ColumnsCommon.h>
#include <Columns/countBytesInFilter.h>
#include <Columns/filterColumn.h>
#include <Common/Arena.h>
#include <Common/Exception.h>
#include <Common/HashTable/Hash.h>
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Columns/IColumnDummy.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@

#pragma once

#include <Columns/ColumnsCommon.h>
#include <Columns/IColumn.h>
#include <Columns/countBytesInFilter.h>
#include <Common/Arena.h>


Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Columns/VirtualColumnUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,9 @@
// See the License for the specific language governing permissions and
// limitations under the License.

#include <Columns/ColumnsCommon.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/VirtualColumnUtils.h>
#include <Columns/filterColumn.h>
#include <Common/typeid_cast.h>
#include <Core/NamesAndTypes.h>
#include <Interpreters/Context.h>
Expand Down
Loading