Remove <typename UInt> from int_writer

Reduce code bloat by removing multiple instantiation of int_writer based on the <typename UInt> parameter. Rationale: - The only functions that gains a speedup by int size would be int_writer::on_dec()'s call to count_digits which uses CLZ. Thus to still take advantage of this speedup, we store the size of the int so we can use a switch statement to call the correct count_digits. - All other implementations of count_digits require some sort of looping that terminates when the value hits zero regardless of what sized int it is. Caveats: - There is a performance hit when dealing with and passing around 64-bit/128-bit values compared to 32-bit values on 32-bit platforms, and with 64-bit values on 64-bit systems. But this should not reduce the performance that dramatically. - There is also a performance hit for on_dec() due to the addition of a switch case. But, due to it size, this should reduce to a jump table. Resolves #1778
fmtlib · Jul 17, 2020 · fc7dfd9 · fc7dfd9
1 parent f5d4215
commit fc7dfd9
Showing 1 changed file with 45 additions and 6 deletions.
diff --git a/include/fmt/format.h b/include/fmt/format.h
@@ -766,6 +766,12 @@ using uint32_or_64_or_128_t = conditional_t<
     std::numeric_limits<T>::digits <= 32, uint32_t,
     conditional_t<std::numeric_limits<T>::digits <= 64, uint64_t, uint128_t>>;
 
+// Selects the between uint64_t or uint128_t based on the how uint128_t is
+// defined. If macro FMT_USE_INT128 defined as 0, then its size will be 1 byte,
+// meaning the largest sized int that can be used is uint64_t.
+using uint_largest_t =
+    conditional_t<sizeof(uint128_t) < sizeof(uint64_t), uint64_t, uint128_t>;
+
 // Static data is placed in this class template for the header-only config.
 template <typename T = void> struct FMT_EXTERN_TEMPLATE_API basic_data {
   static const uint64_t powers_of_10_64[];
@@ -1491,11 +1497,21 @@ OutputIt write(OutputIt out, basic_string_view<StrChar> s,
 }
 
 // The handle_int_type_spec handler that writes an integer.
-template <typename OutputIt, typename Char, typename UInt> struct int_writer {
+template <typename OutputIt, typename Char> struct int_writer {
+  enum class int_bytes {
+    byte1 = sizeof(uint8_t),
+    byte2 = sizeof(uint16_t),
+    byte4 = sizeof(uint32_t),
+    byte8 = sizeof(uint64_t),
+    byte16 = 16,  // Must be directly set because uint128_t can be 16 bytes if
+                  // FMT_USE_INT128 == 1 and 1 byte if FMT_USE_INT128 == 0.
+  };
+
   OutputIt out;
   locale_ref locale;
   const basic_format_specs<Char>& specs;
-  UInt abs_value;
+  uint_largest_t abs_value;
+  int_bytes value_bytes;
   char prefix[4];
   unsigned prefix_size;
 
@@ -1510,9 +1526,11 @@ template <typename OutputIt, typename Char, typename UInt> struct int_writer {
       : out(output),
         locale(loc),
         specs(s),
-        abs_value(static_cast<UInt>(value)),
+        abs_value(static_cast<decltype(abs_value)>(value)),
+        value_bytes(int_bytes::byte8),
         prefix_size(0) {
-    static_assert(std::is_same<uint32_or_64_or_128_t<Int>, UInt>::value, "");
+    value_bytes = static_cast<int_bytes>(sizeof(value));
+
     if (is_negative(value)) {
       prefix[0] = '-';
       ++prefix_size;
@@ -1524,7 +1542,28 @@ template <typename OutputIt, typename Char, typename UInt> struct int_writer {
   }
 
   void on_dec() {
-    auto num_digits = count_digits(abs_value);
+    int num_digits = 0;
+
+    switch (value_bytes) {
+    case int_bytes::byte1:
+    case int_bytes::byte2:
+    case int_bytes::byte4:
+      num_digits = count_digits(static_cast<uint32_t>(abs_value));
+      break;
+    case int_bytes::byte8:
+      num_digits = count_digits(static_cast<uint64_t>(abs_value));
+      break;
+#if !FMT_USE_INT128
+    case int_bytes::byte16:
+      num_digits = count_digits(static_cast<uint64_t>(abs_value));
+      break;
+#else
+    case int_bytes::byte16:
+      num_digits = count_digits(static_cast<uint128_t>(abs_value));
+      break;
+#endif
+    }
+
     out = write_int(
         out, num_digits, get_prefix(), specs, [this, num_digits](iterator it) {
           return format_decimal<Char>(it, abs_value, num_digits).end;
@@ -1865,7 +1904,7 @@ class arg_formatter_base {
 
   template <typename T> void write_int(T value, const format_specs& spec) {
     using uint_type = uint32_or_64_or_128_t<T>;
-    int_writer<iterator, Char, uint_type> w(out_, locale_, value, spec);
+    int_writer<iterator, Char> w(out_, locale_, value, spec);
     handle_int_type_spec(spec.type, w);
     out_ = w.out;
   }