diff --git a/include/intx/int128.hpp b/include/intx/int128.hpp index 16c8e47a..9577e72e 100644 --- a/include/intx/int128.hpp +++ b/include/intx/int128.hpp @@ -659,55 +659,41 @@ inline div_result udivrem(uint128 x, uint128 y) noexcept { INTX_REQUIRE(y.lo != 0); // Division by 0. - uint64_t xn_ex, xn_hi, xn_lo, yn; - - auto lsh = clz(y.lo); - if (lsh != 0) - { - auto rsh = 64 - lsh; - xn_ex = x.hi >> rsh; - xn_hi = (x.lo >> rsh) | (x.hi << lsh); - xn_lo = x.lo << lsh; - yn = y.lo << lsh; - } - else - { - xn_ex = 0; - xn_hi = x.hi; - xn_lo = x.lo; - yn = y.lo; - } - - auto v = reciprocal_2by1(yn); - - auto res = udivrem_2by1({xn_ex, xn_hi}, yn, v); - auto q1 = res.quot; - - res = udivrem_2by1({res.rem, xn_lo}, yn, v); - - return {{q1, res.quot}, res.rem >> lsh}; + const auto lsh = clz(y.lo); + const auto rsh = (64 - lsh) % 64; + const auto rsh_mask = uint64_t{lsh == 0} - 1; + + const auto yn = y.lo << lsh; + const auto xn_lo = x.lo << lsh; + const auto xn_hi = (x.hi << lsh) | ((x.lo >> rsh) & rsh_mask); + const auto xn_ex = (x.hi >> rsh) & rsh_mask; + + const auto v = reciprocal_2by1(yn); + const auto res1 = udivrem_2by1({xn_ex, xn_hi}, yn, v); + const auto res2 = udivrem_2by1({res1.rem, xn_lo}, yn, v); + return {{res1.quot, res2.quot}, res2.rem >> lsh}; } if (y.hi > x.hi) return {0, x}; - auto lsh = clz(y.hi); + const auto lsh = clz(y.hi); if (lsh == 0) { const auto q = unsigned{y.hi < x.hi} | unsigned{y.lo <= x.lo}; return {q, x - (q ? y : 0)}; } - auto rsh = 64 - lsh; + const auto rsh = 64 - lsh; - auto yn_lo = y.lo << lsh; - auto yn_hi = (y.lo >> rsh) | (y.hi << lsh); - auto xn_ex = x.hi >> rsh; - auto xn_hi = (x.lo >> rsh) | (x.hi << lsh); - auto xn_lo = x.lo << lsh; + const auto yn_lo = y.lo << lsh; + const auto yn_hi = (y.hi << lsh) | (y.lo >> rsh); + const auto xn_lo = x.lo << lsh; + const auto xn_hi = (x.hi << lsh) | (x.lo >> rsh); + const auto xn_ex = x.hi >> rsh; - auto v = reciprocal_3by2({yn_hi, yn_lo}); - auto res = udivrem_3by2(xn_ex, xn_hi, xn_lo, {yn_hi, yn_lo}, v); + const auto v = reciprocal_3by2({yn_hi, yn_lo}); + const auto res = udivrem_3by2(xn_ex, xn_hi, xn_lo, {yn_hi, yn_lo}, v); return {res.quot, res.rem >> lsh}; } diff --git a/test/benchmarks/bench_int128.cpp b/test/benchmarks/bench_int128.cpp index 492a01f2..6ffb4eab 100644 --- a/test/benchmarks/bench_int128.cpp +++ b/test/benchmarks/bench_int128.cpp @@ -30,6 +30,7 @@ static void udiv128(benchmark::State& state) { const uint128 inputs[][2] = { {0x537e3fbc5318dbc0e7e47d96b32ef2d5_u128, 0x395df916dfd1b5e38ae7c47ce8a620f_u128}, + {0x837e3fbc5318dbc0e7e47d96b32ef2d5_u128, 0x895df916dfd1b5e38ae7c47ce8a620f_u128}, {0xee657725ff64cd48b8fe188a09dc4f78_u128, 3}, // worst shift {0x0e657725ff64cd48b8fe188a09dc4f78_u128, 0xe7e47d96b32ef2d5}, // single long normalized {0x0e657725ff64cd48b8fe188a09dc4f78_u128, 0x77e47d96b32ef2d5}, // single long