chfast · chfast · Dec 12, 2022 · Nov 30, 2022 · Dec 11, 2022 · Nov 30, 2022
diff --git a/include/intx/intx.hpp b/include/intx/intx.hpp
@@ -1832,6 +1832,11 @@ inline constexpr div_result<uint<N>> sdivrem(const uint<N>& u, const uint<N>& v)
     return {q_is_neg ? -res.quot : res.quot, u_is_neg ? -res.rem : res.rem};
 }
 
+inline constexpr uint256 bswap(const uint256& x) noexcept
+{
+    return {bswap(x[3]), bswap(x[2]), bswap(x[1]), bswap(x[0])};
+}
+
 template <unsigned N>
 inline constexpr uint<N> bswap(const uint<N>& x) noexcept
 {
@@ -2052,10 +2057,13 @@ namespace unsafe
 template <typename IntT>
 inline IntT load(const uint8_t* src) noexcept
 {
-    IntT x;
-    std::memcpy(&x, src, sizeof(x));
-    x = to_big_endian(x);
-    return x;
+    // Align bytes.
+    // TODO: Using memcpy() directly triggers this optimization bug in GCC:
+    //   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107837
+    alignas(IntT) std::byte aligned_storage[sizeof(IntT)];
+    std::memcpy(&aligned_storage, src, sizeof(IntT));
+    // TODO(C++23): Use std::start_lifetime_as<uint256>().
+    return to_big_endian(*reinterpret_cast<const IntT*>(&aligned_storage));
 }
 
 /// Stores an integer value at the provided pointer in big-endian order. The user must make sure
@@ -2066,6 +2074,25 @@ inline void store(uint8_t* dst, const T& x) noexcept
     const auto d = to_big_endian(x);
     std::memcpy(dst, &d, sizeof(d));
 }
+
+/// Specialization for uint256.
+inline void store(uint8_t* dst, const uint256& x) noexcept
+{
+    // Store byte-swapped words in primitive temporaries. This helps with memory aliasing
+    // and GCC bug https://gcc.gnu.org/bugzilla/show_bug.cgi?id=107837
+    // TODO: Use std::byte instead of uint8_t.
+    const auto v0 = to_big_endian(x[0]);
+    const auto v1 = to_big_endian(x[1]);
+    const auto v2 = to_big_endian(x[2]);
+    const auto v3 = to_big_endian(x[3]);
+
+    // Store words in reverse (big-endian) order, write addresses are ascending.
+    std::memcpy(dst, &v3, sizeof(v3));
+    std::memcpy(dst + 8, &v2, sizeof(v2));
+    std::memcpy(dst + 16, &v1, sizeof(v1));
+    std::memcpy(dst + 24, &v0, sizeof(v0));
+}
+
 }  // namespace unsafe
 
 }  // namespace be

diff --git a/test/benchmarks/benchmarks.cpp b/test/benchmarks/benchmarks.cpp
@@ -562,4 +562,35 @@ BENCHMARK_TEMPLATE(to_string, uint128);
 BENCHMARK_TEMPLATE(to_string, uint256);
 BENCHMARK_TEMPLATE(to_string, uint512);
 
+
+template <typename Int>
+[[gnu::noinline]] auto load_be(const uint8_t* data) noexcept
+{
+    return intx::be::unsafe::load<Int>(data);
+}
+
+template <typename Int>
+[[gnu::noinline]] auto store_be(uint8_t* data, const Int& v) noexcept
+{
+    intx::be::unsafe::store(data, v);
+}
+
+template <typename Int>
+static void load_store_be(benchmark::State& state)
+{
+    uint8_t load_buffer[sizeof(Int) + 7]{};
+    const auto unaligned_load_ptr = load_buffer + 7;
+    uint8_t store_buffer[sizeof(Int) + 1]{};
+    const auto unaligned_store_ptr = store_buffer + 1;
+
+    for ([[maybe_unused]] auto _ : state)
+    {
+        auto v = load_be<Int>(unaligned_load_ptr);
+        store_be(unaligned_store_ptr, v);
+    }
+}
+BENCHMARK_TEMPLATE(load_store_be, uint128);
+BENCHMARK_TEMPLATE(load_store_be, uint256);
+BENCHMARK_TEMPLATE(load_store_be, uint512);
+
 BENCHMARK_MAIN();
diff --git a/test/fuzzer/fuzz_intx.cpp b/test/fuzzer/fuzz_intx.cpp
@@ -17,6 +17,7 @@ enum class op : uint8_t
     add = 0x04,
     sub = 0x05,
     sdivrem = 0x06,
+    cmp = 0x07,
 };
 
 template <typename T>
@@ -33,11 +34,9 @@ inline void test_op(const uint8_t* data, size_t data_size) noexcept
     if (data_size != 2 * arg_size + 1)
         return;
 
-    T a, b;
-    std::memcpy(&a, &data[1], arg_size);
-    std::memcpy(&b, &data[1 + arg_size], arg_size);
-    a = bswap(a);  // Bswap for BE - easier to extract the test from corpus.
-    b = bswap(b);
+    // Load 2 values. BE for easier extracting tests from corpus.
+    const auto a = be::unsafe::load<T>(&data[1]);
+    const auto b = be::unsafe::load<T>(&data[1 + arg_size]);
 
     switch (static_cast<op>(data[0]))
     {
@@ -94,6 +93,19 @@ inline void test_op(const uint8_t* data, size_t data_size) noexcept
         expect_eq(s, a + (~b + 1));
         break;
     }
+    case op::cmp:
+    {
+        auto aa = to_big_endian(a);
+        auto bb = to_big_endian(b);
+        auto m = std::memcmp(&aa, &bb, sizeof(aa));
+        expect_eq(a < b, m < 0);
+        expect_eq(a <= b, m <= 0);
+        expect_eq(a > b, m > 0);
+        expect_eq(a >= b, m >= 0);
+        expect_eq(a == b, m == 0);
+        expect_eq(a != b, m != 0);
+        break;
+    }
 
     default:
         break;
@@ -106,7 +118,10 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t data_size) noe
     test_op<intx::uint<2048>>(data, data_size);
     test_op<intx::uint<1024>>(data, data_size);
     test_op<intx::uint<512>>(data, data_size);
+    test_op<intx::uint<384>>(data, data_size);
+    test_op<intx::uint<320>>(data, data_size);
     test_op<intx::uint<256>>(data, data_size);
+    test_op<intx::uint<192>>(data, data_size);
     test_op<intx::uint<128>>(data, data_size);
     return 0;
 }
diff --git a/test/unittests/test_intx.cpp b/test/unittests/test_intx.cpp
@@ -154,7 +154,7 @@ TYPED_TEST(uint_test, endianness)
 
 TYPED_TEST(uint_test, be_zext)
 {
-    uint8_t data[] = {0x01, 0x02, 0x03};
+    const uint8_t data[] = {0x01, 0x02, 0x03};
     const auto x = be::load<TypeParam>(data);
     EXPECT_EQ(x, 0x010203);
 }
@@ -164,9 +164,9 @@ TYPED_TEST(uint_test, be_load)
     constexpr auto size = sizeof(TypeParam);
     uint8_t data[size]{};
     data[0] = 0x80;
-    data[size - 1] = 1;
+    data[size - 1] = 3;
     const auto x = be::load<TypeParam>(data);
-    EXPECT_EQ(x, (TypeParam{1} << (TypeParam::num_bits - 1)) | 1);
+    EXPECT_EQ(x, (TypeParam{1} << (TypeParam::num_bits - 1)) | 3);
 }
 
 TYPED_TEST(uint_test, be_store)