Skip to content

Commit

Permalink
Merge pull request #281 from chfast/load_store
Browse files Browse the repository at this point in the history
Optimize BE load and store of uint256
  • Loading branch information
chfast authored Dec 12, 2022
2 parents 15e3a02 + cf96de4 commit 1a1911e
Show file tree
Hide file tree
Showing 4 changed files with 85 additions and 12 deletions.
35 changes: 31 additions & 4 deletions include/intx/intx.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1832,6 +1832,11 @@ inline constexpr div_result<uint<N>> sdivrem(const uint<N>& u, const uint<N>& v)
return {q_is_neg ? -res.quot : res.quot, u_is_neg ? -res.rem : res.rem};
}

inline constexpr uint256 bswap(const uint256& x) noexcept
{
return {bswap(x[3]), bswap(x[2]), bswap(x[1]), bswap(x[0])};
}

template <unsigned N>
inline constexpr uint<N> bswap(const uint<N>& x) noexcept
{
Expand Down Expand Up @@ -2052,10 +2057,13 @@ namespace unsafe
template <typename IntT>
inline IntT load(const uint8_t* src) noexcept
{
IntT x;
std::memcpy(&x, src, sizeof(x));
x = to_big_endian(x);
return x;
// Align bytes.
// TODO: Using memcpy() directly triggers this optimization bug in GCC:
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107837
alignas(IntT) std::byte aligned_storage[sizeof(IntT)];
std::memcpy(&aligned_storage, src, sizeof(IntT));
// TODO(C++23): Use std::start_lifetime_as<uint256>().
return to_big_endian(*reinterpret_cast<const IntT*>(&aligned_storage));
}

/// Stores an integer value at the provided pointer in big-endian order. The user must make sure
Expand All @@ -2066,6 +2074,25 @@ inline void store(uint8_t* dst, const T& x) noexcept
const auto d = to_big_endian(x);
std::memcpy(dst, &d, sizeof(d));
}

/// Specialization for uint256.
inline void store(uint8_t* dst, const uint256& x) noexcept
{
// Store byte-swapped words in primitive temporaries. This helps with memory aliasing
// and GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107837
// TODO: Use std::byte instead of uint8_t.
const auto v0 = to_big_endian(x[0]);
const auto v1 = to_big_endian(x[1]);
const auto v2 = to_big_endian(x[2]);
const auto v3 = to_big_endian(x[3]);

// Store words in reverse (big-endian) order, write addresses are ascending.
std::memcpy(dst, &v3, sizeof(v3));
std::memcpy(dst + 8, &v2, sizeof(v2));
std::memcpy(dst + 16, &v1, sizeof(v1));
std::memcpy(dst + 24, &v0, sizeof(v0));
}

} // namespace unsafe

} // namespace be
Expand Down
31 changes: 31 additions & 0 deletions test/benchmarks/benchmarks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -562,4 +562,35 @@ BENCHMARK_TEMPLATE(to_string, uint128);
BENCHMARK_TEMPLATE(to_string, uint256);
BENCHMARK_TEMPLATE(to_string, uint512);


template <typename Int>
[[gnu::noinline]] auto load_be(const uint8_t* data) noexcept
{
return intx::be::unsafe::load<Int>(data);
}

template <typename Int>
[[gnu::noinline]] auto store_be(uint8_t* data, const Int& v) noexcept
{
intx::be::unsafe::store(data, v);
}

template <typename Int>
static void load_store_be(benchmark::State& state)
{
uint8_t load_buffer[sizeof(Int) + 7]{};
const auto unaligned_load_ptr = load_buffer + 7;
uint8_t store_buffer[sizeof(Int) + 1]{};
const auto unaligned_store_ptr = store_buffer + 1;

for ([[maybe_unused]] auto _ : state)
{
auto v = load_be<Int>(unaligned_load_ptr);
store_be(unaligned_store_ptr, v);
}
}
BENCHMARK_TEMPLATE(load_store_be, uint128);
BENCHMARK_TEMPLATE(load_store_be, uint256);
BENCHMARK_TEMPLATE(load_store_be, uint512);

BENCHMARK_MAIN();
25 changes: 20 additions & 5 deletions test/fuzzer/fuzz_intx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ enum class op : uint8_t
add = 0x04,
sub = 0x05,
sdivrem = 0x06,
cmp = 0x07,
};

template <typename T>
Expand All @@ -33,11 +34,9 @@ inline void test_op(const uint8_t* data, size_t data_size) noexcept
if (data_size != 2 * arg_size + 1)
return;

T a, b;
std::memcpy(&a, &data[1], arg_size);
std::memcpy(&b, &data[1 + arg_size], arg_size);
a = bswap(a); // Bswap for BE - easier to extract the test from corpus.
b = bswap(b);
// Load 2 values. BE for easier extracting tests from corpus.
const auto a = be::unsafe::load<T>(&data[1]);
const auto b = be::unsafe::load<T>(&data[1 + arg_size]);

switch (static_cast<op>(data[0]))
{
Expand Down Expand Up @@ -94,6 +93,19 @@ inline void test_op(const uint8_t* data, size_t data_size) noexcept
expect_eq(s, a + (~b + 1));
break;
}
case op::cmp:
{
auto aa = to_big_endian(a);
auto bb = to_big_endian(b);
auto m = std::memcmp(&aa, &bb, sizeof(aa));
expect_eq(a < b, m < 0);
expect_eq(a <= b, m <= 0);
expect_eq(a > b, m > 0);
expect_eq(a >= b, m >= 0);
expect_eq(a == b, m == 0);
expect_eq(a != b, m != 0);
break;
}

default:
break;
Expand All @@ -106,7 +118,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t data_size) noe
test_op<intx::uint<2048>>(data, data_size);
test_op<intx::uint<1024>>(data, data_size);
test_op<intx::uint<512>>(data, data_size);
test_op<intx::uint<384>>(data, data_size);
test_op<intx::uint<320>>(data, data_size);
test_op<intx::uint<256>>(data, data_size);
test_op<intx::uint<192>>(data, data_size);
test_op<intx::uint<128>>(data, data_size);
return 0;
}
6 changes: 3 additions & 3 deletions test/unittests/test_intx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ TYPED_TEST(uint_test, endianness)

TYPED_TEST(uint_test, be_zext)
{
uint8_t data[] = {0x01, 0x02, 0x03};
const uint8_t data[] = {0x01, 0x02, 0x03};
const auto x = be::load<TypeParam>(data);
EXPECT_EQ(x, 0x010203);
}
Expand All @@ -164,9 +164,9 @@ TYPED_TEST(uint_test, be_load)
constexpr auto size = sizeof(TypeParam);
uint8_t data[size]{};
data[0] = 0x80;
data[size - 1] = 1;
data[size - 1] = 3;
const auto x = be::load<TypeParam>(data);
EXPECT_EQ(x, (TypeParam{1} << (TypeParam::num_bits - 1)) | 1);
EXPECT_EQ(x, (TypeParam{1} << (TypeParam::num_bits - 1)) | 3);
}

TYPED_TEST(uint_test, be_store)
Expand Down

0 comments on commit 1a1911e

Please sign in to comment.