Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: Improve the implementation of simd based packing #577

Merged
merged 1 commit into from
Dec 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/core/compact_object.cc
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ void CompactObj::SetString(std::string_view str) {
}

tl.tmp_buf.resize(encode_len);
detail::ascii_pack_simd(str.data(), str.size(), tl.tmp_buf.data());
detail::ascii_pack_simd2(str.data(), str.size(), tl.tmp_buf.data());
encoded = string_view{reinterpret_cast<char*>(tl.tmp_buf.data()), encode_len};

if (encoded.size() <= kInlineLen) {
Expand Down
12 changes: 11 additions & 1 deletion src/core/compact_object_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ TEST_F(CompactObjectTest, AsciiUtil) {
}
string act_str(data3.size(), 'y');
std::vector<uint8_t> binvec(detail::binpacked_len(data3.size()));
detail::ascii_pack_simd(data3.data(), data3.size(), binvec.data());
detail::ascii_pack_simd2(data3.data(), data3.size(), binvec.data());
detail::ascii_unpack_simd(binvec.data(), data3.size(), act_str.data());

ASSERT_EQ(data3, act_str);
Expand Down Expand Up @@ -546,6 +546,16 @@ static void BM_PackSimd(benchmark::State& state) {
}
BENCHMARK(BM_PackSimd);

static void BM_PackSimd2(benchmark::State& state) {
string val(1024, 'a');
uint8_t buf[1024];

while (state.KeepRunning()) {
detail::ascii_pack_simd2(val.data(), val.size(), buf);
}
}
BENCHMARK(BM_PackSimd2);

static void BM_UnpackNaive(benchmark::State& state) {
string val(1024, 'a');
uint8_t buf[1024];
Expand Down
38 changes: 38 additions & 0 deletions src/core/detail/bitpacking.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,44 @@ void ascii_pack_simd(const char* ascii, size_t len, uint8_t* bin) {
ascii_pack(ascii, end - ascii, bin);
}

void ascii_pack_simd2(const char* ascii, size_t len, uint8_t* bin) {
// I leave out 16 bytes in addition to 16 that we load in the loop
// because we store into bin full 16 bytes instead of 14. To prevent data
// overwrite we finish loop one iteration earlier.
const char* end = ascii + len - 32;

// Skips 8th byte (indexc 7) in the lower 8-byte part.
const __m128i control = _mm_set_epi8(-1, -1, 14, 13, 12, 11, 10, 9, 8, 6, 5, 4, 3, 2, 1, 0);

__m128i val, rpart, lpart;

// Based on the question I asked here: https://stackoverflow.com/q/74831843/2280111
while (ascii <= end) {
val = _mm_loadu_si128(reinterpret_cast<const __m128i*>(ascii));

/*
x = ((x & 0x7F007F007F007F00) >> 1) | (x & 0x007F007F007F007F);
x = ((x & 0x3FFF00003FFF0000) >> 2) | (x & 0x00003FFF00003FFF);
x = ((x & 0x0FFFFFFF00000000) >> 4) | (x & 0x000000000FFFFFFF);
*/
val = _mm_maddubs_epi16(_mm_set1_epi16(0x8001), val);
val = _mm_madd_epi16(_mm_set1_epi32(0x40000001), val);

rpart = _mm_and_si128(val, _mm_set1_epi64x(0x000000000FFFFFFF));
lpart = _mm_and_si128(val, _mm_set1_epi64x(0x0FFFFFFF00000000));
val = _mm_or_si128(_mm_srli_epi64(lpart, 4), rpart);

val = _mm_shuffle_epi8(val, control);
_mm_storeu_si128(reinterpret_cast<__m128i*>(bin), val);
bin += 14;
ascii += 16;
}

end += 32; // Bring back end.
DCHECK(ascii < end);
ascii_pack(ascii, end - ascii, bin);
}

// unpacks 8->7 encoded blob back to ascii.
// generally, we can not unpack inplace because ascii (dest) buffer is 8/7 bigger than
// the source buffer.
Expand Down
2 changes: 2 additions & 0 deletions src/core/detail/bitpacking.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ void ascii_pack(const char* ascii, size_t len, uint8_t* bin);
void ascii_pack2(const char* ascii, size_t len, uint8_t* bin);

void ascii_pack_simd(const char* ascii, size_t len, uint8_t* bin);
void ascii_pack_simd2(const char* ascii, size_t len, uint8_t* bin);

bool compare_packed(const uint8_t* packed, const char* ascii, size_t ascii_len);

// maps ascii len to 7-bit packed length. Each 8 bytes are converted to 7 bytes.
Expand Down