diff --git a/include/nlohmann/detail/input/input_adapters.hpp b/include/nlohmann/detail/input/input_adapters.hpp index f8155faa9d..51d2a373be 100644 --- a/include/nlohmann/detail/input/input_adapters.hpp +++ b/include/nlohmann/detail/input/input_adapters.hpp @@ -142,136 +142,142 @@ class wide_string_input_adapter : public input_adapter_protocol template void fill_buffer() { - fill_buffer_utf32(); - } - - template<> - void fill_buffer<2>() - { - fill_buffer_utf16(); + wide_string_input_helper::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); } - void fill_buffer_utf16() - { - utf8_bytes_index = 0; + /// the wstring to process + const WideStringType& str; - if (current_wchar == str.size()) - { - utf8_bytes[0] = std::char_traits::eof(); - utf8_bytes_filled = 1; - } - else + /// index of the current wchar in str + std::size_t current_wchar = 0; + + /// a buffer for UTF-8 bytes + std::array::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; + + /// index to the utf8_codes array for the next valid byte + std::size_t utf8_bytes_index = 0; + /// number of valid bytes in the utf8_codes array + std::size_t utf8_bytes_filled = 0; +}; + +namespace +{ + template + struct wide_string_input_helper + { + // UTF-32 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { - // get the current character - const int wc = static_cast(str[current_wchar++]); + utf8_bytes_index = 0; - // UTF-16 to UTF-8 encoding - if (wc < 0x80) + if (current_wchar == str.size()) { - utf8_bytes[0] = wc; + utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6)); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (0xD800 > wc or wc >= 0xE000) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12)); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } else { - if (current_wchar < str.size()) + // get the current character + const int wc = static_cast(str[current_wchar++]); + + // UTF-32 to UTF-8 encoding + if (wc < 0x80) { - const int wc2 = static_cast(str[current_wchar++]); - const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); - utf8_bytes[0] = 0xf0 | (charcode >> 18); - utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (charcode & 0x3F); + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (wc <= 0xFFFF) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else if (wc <= 0x10FFFF) + { + utf8_bytes[0] = 0xF0 | ((wc >> 18) & 0x07); + utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (wc & 0x3F); utf8_bytes_filled = 4; } else { // unknown character - ++current_wchar; utf8_bytes[0] = wc; utf8_bytes_filled = 1; } } } - } + }; - void fill_buffer_utf32() + template + struct wide_string_input_helper { - utf8_bytes_index = 0; - - if (current_wchar == str.size()) - { - utf8_bytes[0] = std::char_traits::eof(); - utf8_bytes_filled = 1; - } - else + // UTF-16 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { - // get the current character - const int wc = static_cast(str[current_wchar++]); + utf8_bytes_index = 0; - // UTF-32 to UTF-8 encoding - if (wc < 0x80) + if (current_wchar == str.size()) { - utf8_bytes[0] = wc; + utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (wc <= 0xFFFF) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } - else if (wc <= 0x10FFFF) - { - utf8_bytes[0] = 0xF0 | ((wc >> 18 ) & 0x07); - utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 4; - } else { - // unknown character - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; + // get the current character + const int wc = static_cast(str[current_wchar++]); + + // UTF-16 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6)); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (0xD800 > wc or wc >= 0xE000) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12)); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else + { + if (current_wchar < str.size()) + { + const int wc2 = static_cast(str[current_wchar++]); + const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); + utf8_bytes[0] = 0xf0 | (charcode >> 18); + utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (charcode & 0x3F); + utf8_bytes_filled = 4; + } + else + { + // unknown character + ++current_wchar; + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + } } } - } - - private: - /// the wstring to process - const WideStringType& str; - - /// index of the current wchar in str - std::size_t current_wchar = 0; - - /// a buffer for UTF-8 bytes - std::array::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; - - /// index to the utf8_codes array for the next valid byte - std::size_t utf8_bytes_index = 0; - /// number of valid bytes in the utf8_codes array - std::size_t utf8_bytes_filled = 0; -}; + }; +} class input_adapter { diff --git a/single_include/nlohmann/json.hpp b/single_include/nlohmann/json.hpp index c19ee6c9ef..c9132fa221 100644 --- a/single_include/nlohmann/json.hpp +++ b/single_include/nlohmann/json.hpp @@ -2020,136 +2020,142 @@ class wide_string_input_adapter : public input_adapter_protocol template void fill_buffer() { - fill_buffer_utf32(); - } - - template<> - void fill_buffer<2>() - { - fill_buffer_utf16(); + wide_string_input_helper::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); } - void fill_buffer_utf16() - { - utf8_bytes_index = 0; + /// the wstring to process + const WideStringType& str; - if (current_wchar == str.size()) - { - utf8_bytes[0] = std::char_traits::eof(); - utf8_bytes_filled = 1; - } - else + /// index of the current wchar in str + std::size_t current_wchar = 0; + + /// a buffer for UTF-8 bytes + std::array::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; + + /// index to the utf8_codes array for the next valid byte + std::size_t utf8_bytes_index = 0; + /// number of valid bytes in the utf8_codes array + std::size_t utf8_bytes_filled = 0; +}; + +namespace +{ + template + struct wide_string_input_helper + { + // UTF-32 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { - // get the current character - const int wc = static_cast(str[current_wchar++]); + utf8_bytes_index = 0; - // UTF-16 to UTF-8 encoding - if (wc < 0x80) + if (current_wchar == str.size()) { - utf8_bytes[0] = wc; + utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6)); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (0xD800 > wc or wc >= 0xE000) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12)); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } else { - if (current_wchar < str.size()) + // get the current character + const int wc = static_cast(str[current_wchar++]); + + // UTF-32 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) { - const int wc2 = static_cast(str[current_wchar++]); - const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); - utf8_bytes[0] = 0xf0 | (charcode >> 18); - utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (charcode & 0x3F); + utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (wc <= 0xFFFF) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else if (wc <= 0x10FFFF) + { + utf8_bytes[0] = 0xF0 | ((wc >> 18) & 0x07); + utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (wc & 0x3F); utf8_bytes_filled = 4; } else { // unknown character - ++current_wchar; utf8_bytes[0] = wc; utf8_bytes_filled = 1; } } } - } + }; - void fill_buffer_utf32() + template + struct wide_string_input_helper { - utf8_bytes_index = 0; - - if (current_wchar == str.size()) - { - utf8_bytes[0] = std::char_traits::eof(); - utf8_bytes_filled = 1; - } - else + // UTF-16 + static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) { - // get the current character - const int wc = static_cast(str[current_wchar++]); + utf8_bytes_index = 0; - // UTF-32 to UTF-8 encoding - if (wc < 0x80) + if (current_wchar == str.size()) { - utf8_bytes[0] = wc; + utf8_bytes[0] = std::char_traits::eof(); utf8_bytes_filled = 1; } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (wc <= 0xFFFF) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } - else if (wc <= 0x10FFFF) - { - utf8_bytes[0] = 0xF0 | ((wc >> 18 ) & 0x07); - utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 4; - } else { - // unknown character - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; + // get the current character + const int wc = static_cast(str[current_wchar++]); + + // UTF-16 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6)); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (0xD800 > wc or wc >= 0xE000) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12)); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else + { + if (current_wchar < str.size()) + { + const int wc2 = static_cast(str[current_wchar++]); + const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); + utf8_bytes[0] = 0xf0 | (charcode >> 18); + utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (charcode & 0x3F); + utf8_bytes_filled = 4; + } + else + { + // unknown character + ++current_wchar; + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + } } } - } - - private: - /// the wstring to process - const WideStringType& str; - - /// index of the current wchar in str - std::size_t current_wchar = 0; - - /// a buffer for UTF-8 bytes - std::array::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; - - /// index to the utf8_codes array for the next valid byte - std::size_t utf8_bytes_index = 0; - /// number of valid bytes in the utf8_codes array - std::size_t utf8_bytes_filled = 0; -}; + }; +} class input_adapter {