diff --git a/externals/coda-oss/ReleaseNotes.md b/externals/coda-oss/ReleaseNotes.md index f75afd646..8838f1089 100644 --- a/externals/coda-oss/ReleaseNotes.md +++ b/externals/coda-oss/ReleaseNotes.md @@ -11,7 +11,13 @@ ``` # coda-oss Release Notes -## Release 2022-06-29 +## [Release 2022-08-02](https://github.com/mdaus/coda-oss/releases/tag/2022-08-02) +* remove *Expat* and *libXML* modules and support in **xml.lite**; only *Xerces* was actively used. +* fix `waf` to work-around FIPS warning because of insecure *md5* hashing. +* tweak `str::EncodedStringView` and `str::EncodedString` for + [future XML changes](https://github.com/mdaus/coda-oss/tree/feature/always-write-xml-as-utf8). + +## [Release 2022-06-29](https://github.com/mdaus/coda-oss/releases/tag/2022-06-29) * remove **modules/drivers/boost** as it was empty (and unused); **modules/c++/serialize** depended on boost, so it has also been removed. * Update to [zlib 1.2.12](https://www.zlib.net/zlib-1.2.12.tar.gz), @@ -21,14 +27,14 @@ * Begin work on `CODA_OSS_API` (needed for building a shared-library/DLL) * Add `run1D()` method to `mt::GenerationThreadPool` -## Release 2022-05-03 +## [Release 2022-05-03](https://github.com/mdaus/coda-oss/releases/tag/2022-05-03) * Fixed a bug in `Poly2D::atY()`; improved `flipXY()` behavior. * Implement [std::filesystem::file_size()](https://en.cppreference.com/w/cpp/filesystem/file_size). * use `inline` functions for `TEST_` macros * force use of [64-bit `time_t`](https://en.wikipedia.org/wiki/Year_2038_problem) * more routines now support a `std::span` overload; e.g., `io::InputStream::read()`. -## (Release 2022-02-22) +## [Release 2022-02-22](https://github.com/mdaus/coda-oss/releases/tag/2022-02-22) * new `EnocdedString` and `EncodedStringView` to manage strings in different encodings * XML containing UTF-8 characters can now be validated * Update to [GSL 4.0.0](https://github.com/microsoft/GSL/releases/tag/v4.0.0) diff --git a/externals/coda-oss/modules/c++/config/include/config/Version.h b/externals/coda-oss/modules/c++/config/include/config/Version.h index 75bb473d9..0bb1009fc 100644 --- a/externals/coda-oss/modules/c++/config/include/config/Version.h +++ b/externals/coda-oss/modules/c++/config/include/config/Version.h @@ -42,12 +42,12 @@ static_assert(CODA_OSS_MAKE_VERSION_MMPB(9999, 9999, 9999, 9999) <= UINT64_MAX, // Do this ala C++ ... we don't currently have major/minor/patch //#define CODA_OSS_VERSION_ 20210910L // c.f. __cplusplus -#define CODA_OSS_VERSION_ 2022 ## 0006 ## 0029 ## 0000 ## L +#define CODA_OSS_VERSION_ 2022 ## 0008 ## 0002 ## 0000 ## L // Use the same macros other projects might want to use; overkill for us. #define CODA_OSS_VERSION_MAJOR 2022 -#define CODA_OSS_VERSION_MINOR 6 -#define CODA_OSS_VERSION_PATCH 29 +#define CODA_OSS_VERSION_MINOR 8 +#define CODA_OSS_VERSION_PATCH 2 #define CODA_OSS_VERSION_BUILD 0 #define CODA_OSS_VERSION CODA_OSS_MAKE_VERSION_MMPB(CODA_OSS_VERSION_MAJOR, CODA_OSS_VERSION_MINOR, CODA_OSS_VERSION_PATCH, CODA_OSS_VERSION_BUILD) diff --git a/externals/coda-oss/modules/c++/io/include/io/ReadUtils.h b/externals/coda-oss/modules/c++/io/include/io/ReadUtils.h index 7bf079323..4d16a06d9 100644 --- a/externals/coda-oss/modules/c++/io/include/io/ReadUtils.h +++ b/externals/coda-oss/modules/c++/io/include/io/ReadUtils.h @@ -42,7 +42,7 @@ namespace io */ void readFileContents(const std::string& pathname, std::vector& buffer); -void readFileContents(const sys::filesystem::path& pathname, std::vector& buffer); +void readFileContents(const coda_oss::filesystem::path& pathname, std::vector& buffer); /*! * Reads the contents of a file into a string. The file is assumed to be a diff --git a/externals/coda-oss/modules/c++/io/source/ReadUtils.cpp b/externals/coda-oss/modules/c++/io/source/ReadUtils.cpp index 4f83c7e0a..b85fbfdd5 100644 --- a/externals/coda-oss/modules/c++/io/source/ReadUtils.cpp +++ b/externals/coda-oss/modules/c++/io/source/ReadUtils.cpp @@ -40,7 +40,7 @@ void readFileContents(const std::string& pathname, { readFileContents_(pathname, buffer); } -void readFileContents(const sys::filesystem::path& pathname, std::vector& buffer) +void readFileContents(const coda_oss::filesystem::path& pathname, std::vector& buffer) { readFileContents_(pathname, buffer); } diff --git a/externals/coda-oss/modules/c++/str/include/str/EncodedString.h b/externals/coda-oss/modules/c++/str/include/str/EncodedString.h index b726225cb..0dd6655d2 100644 --- a/externals/coda-oss/modules/c++/str/include/str/EncodedString.h +++ b/externals/coda-oss/modules/c++/str/include/str/EncodedString.h @@ -52,18 +52,13 @@ class CODA_OSS_API EncodedString final // We can do most everything through the view, so keep one around. EncodedStringView v_; - const std::string& string() const - { - return s_; - } - // No "public" operator=() for these; this class is mostly for storage and/or conversion, // not extensive manipulation. Create a new instance and assign/move that. void assign(coda_oss::u8string::const_pointer); void assign(str::W1252string::const_pointer); public: - EncodedString() = default; + EncodedString(); ~EncodedString() = default; EncodedString(const EncodedString&); EncodedString& operator=(const EncodedString&); @@ -134,7 +129,7 @@ class CODA_OSS_API EncodedString final { static const std::string& string(const EncodedString& es) // for unit-testing { - return es.string(); + return es.s_; } }; }; diff --git a/externals/coda-oss/modules/c++/str/include/str/EncodedStringView.h b/externals/coda-oss/modules/c++/str/include/str/EncodedStringView.h index 6c0b98350..5f5056f74 100644 --- a/externals/coda-oss/modules/c++/str/include/str/EncodedStringView.h +++ b/externals/coda-oss/modules/c++/str/include/str/EncodedStringView.h @@ -52,7 +52,11 @@ class CODA_OSS_API EncodedStringView final // Since we only support two encodings--UTF-8 (native on Linux) and Windows-1252 // (native on Windows)--both of which are 8-bits, a simple "bool" flag will do. coda_oss::span mString; - static constexpr bool mNativeIsUtf8 = details::Platform == details::PlatformType::Linux ? true : false; + #if _WIN32 + static constexpr bool mNativeIsUtf8 = false; // Windows-1252 + #else + static constexpr bool mNativeIsUtf8 = true; // !_WIN32, assume Linux + #endif bool mIsUtf8 = mNativeIsUtf8; // Want to create an EncodedString from EncodedStringView. The public interface @@ -89,7 +93,6 @@ class CODA_OSS_API EncodedStringView final // Convert (perhaps) whatever we're looking at to UTF-8 coda_oss::u8string u8string() const; // c.f. std::filesystem::path::u8string() - std::string& toUtf8(std::string&) const; // std::string is encoded as UTF-8, always. // Convert whatever we're looking at to UTF-16 or UTF-32 std::u16string u16string() const; // c.f. std::filesystem::path::u8string() @@ -111,34 +114,34 @@ class CODA_OSS_API EncodedStringView final { return mIsUtf8 ? cast(c_str()) : nullptr; } + str::W1252string::const_pointer c_w1252str() const + { + return mIsUtf8 ? nullptr : cast(c_str()); + } size_t size() const { return mString.size(); } // Input is encoded as specified on all platforms. - static EncodedStringView fromUtf8(const std::string& s) + static EncodedStringView fromUtf8(const std::string& utf8) { - return EncodedStringView(str::c_str(s)); + return EncodedStringView(str::c_str(utf8)); } - static EncodedStringView fromUtf8(std::string::const_pointer p) + static EncodedStringView fromUtf8(std::string::const_pointer pUtf8) { - return EncodedStringView(str::cast(p)); + return EncodedStringView(str::cast(pUtf8)); } - static EncodedStringView fromWindows1252(const std::string& s) + static EncodedStringView fromWindows1252(const std::string& w1252) { - return EncodedStringView(str::c_str(s)); + return EncodedStringView(str::c_str(w1252)); } - static EncodedStringView fromWindows1252(std::string::const_pointer p) + static EncodedStringView fromWindows1252(std::string::const_pointer pW1252) { - return EncodedStringView(str::cast(p)); + return EncodedStringView(str::cast(pW1252)); } - std::string asUtf8() const - { - std::string retval; - return toUtf8(retval); - } + std::string asUtf8() const; std::string asWindows1252() const; bool operator_eq(const EncodedStringView&) const; diff --git a/externals/coda-oss/modules/c++/str/include/str/Encoding.h b/externals/coda-oss/modules/c++/str/include/str/Encoding.h index 0cace45b2..8fcdeb5c4 100644 --- a/externals/coda-oss/modules/c++/str/include/str/Encoding.h +++ b/externals/coda-oss/modules/c++/str/include/str/Encoding.h @@ -37,27 +37,6 @@ #include "gsl/gsl.h" #include "config/Exports.h" -// This can be useful for code that will compile on all platforms, but needs -// different platform-specific behavior. This avoids the use of more #ifdefs -// (no preprocessor) and also squelches compiler-warnings about unused local -// functions. -namespace str { namespace details // YOU should be using sys::PlatformType -{ -enum class PlatformType -{ - Windows, - Linux, - // MacOS -}; - -#if _WIN32 -constexpr auto Platform = PlatformType::Windows; -#else -constexpr auto Platform = PlatformType::Linux; -#endif -} } - - namespace str { template @@ -83,17 +62,6 @@ inline typename TBasicStringT::const_pointer c_str(const std::basic_string; // https://en.cppreference.com/w/cpp/string -CODA_OSS_API coda_oss::u8string fromWindows1252(std::string::const_pointer, size_t); // std::string is Windows-1252 **ON ALL PLATFORMS** -inline coda_oss::u8string fromWindows1252(std::string::const_pointer s) -{ - return fromWindows1252(s, gsl::narrow(strlen(s))); -} -CODA_OSS_API coda_oss::u8string fromUtf8(std::string::const_pointer, size_t); // std::string is UTF-8 **ON ALL PLATFORMS** -inline coda_oss::u8string fromUtf8(std::string::const_pointer s) -{ - return fromUtf8(s, gsl::narrow(strlen(s))); -} - // With some older C++ compilers, uint16_t may be used instead of char16_t :-( using ui16string = std::basic_string; // ui = UInt16_t @@ -110,76 +78,39 @@ static_assert(!std::is_same::value, "wchar_t should not be the // When the encoding is important, we want to "traffic" in coda_oss::u8string (UTF-8), not // str::W1252string (Windows-1252) or std::string (unknown). Make it easy to get those from other encodings. -CODA_OSS_API coda_oss::u8string to_u8string(std::string::const_pointer, size_t); // std::string is Windows-1252 or UTF-8 depending on platform CODA_OSS_API coda_oss::u8string to_u8string(str::W1252string::const_pointer, size_t); inline coda_oss::u8string to_u8string(coda_oss::u8string::const_pointer s, size_t sz) { return coda_oss::u8string(s, sz); } -CODA_OSS_API coda_oss::u8string to_u8string(std::wstring::const_pointer, size_t); // std::wstring is UTF-16 or UTF-32 depending on platform -// UTF-16 is typically uses on Windows (where it is std::wstring::value_type); -// Linux preferred UTF-32. +// UTF-16 is typically uses on Windows (where it is std::wstring::value_type); Linux prefers UTF-32. CODA_OSS_API coda_oss::u8string to_u8string(std::u16string::const_pointer, size_t); + CODA_OSS_API std::u16string to_u16string(coda_oss::u8string::const_pointer, size_t); +str::ui16string to_ui16string(coda_oss::u8string::const_pointer, size_t); +std::u16string to_u16string(str::W1252string::const_pointer, size_t); +str::ui16string to_ui16string(str::W1252string::const_pointer, size_t); // UTF-32 is convenient because each code-point is a single 32-bit integer. // It's typically std::wstring::value_type on Linux, but NOT Windows. CODA_OSS_API coda_oss::u8string to_u8string(std::u32string::const_pointer, size_t); CODA_OSS_API std::u32string to_u32string(coda_oss::u8string::const_pointer, size_t); +std::u32string to_u32string(str::W1252string::const_pointer, size_t); template inline coda_oss::u8string to_u8string(const std::basic_string& s) { return to_u8string(s.c_str(), s.size()); } -template -inline std::u16string to_u16string(const std::basic_string& s) -{ - return to_u16string(s.c_str(), s.size()); -} -template -inline std::u32string to_u32string(const std::basic_string& s) -{ - return to_u32string(s.c_str(), s.size()); -} -namespace details // YOU should use EncodedStringView -{ -coda_oss::u8string to_u8string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); -std::u16string to_u16string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); -ui16string to_ui16string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); // older C++ compilers -std::u32string to_u32string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); -std::wstring to_wstring(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); - -std::string& to_u8string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */, std::string&); // encoding is lost -std::string& to_u8string(std::u16string::const_pointer, size_t, std::string&); // encoding is lost -std::string& to_u8string(std::u32string::const_pointer, size_t, std::string&); // encoding is lost - -str::W1252string to_w1252string(std::string::const_pointer, size_t); // std::string is Windows-1252 or UTF-8 depending on platform -str::W1252string to_w1252string(std::string::const_pointer, size_t, bool is_utf8 /* is 's' UTF-8? */); -CODA_OSS_API str::W1252string to_w1252string(coda_oss::u8string::const_pointer, size_t); -inline str::W1252string to_w1252string(str::W1252string::const_pointer s, size_t sz) -{ - return str::W1252string(s, sz); -} +CODA_OSS_API str::W1252string to_w1252string(coda_oss::u8string::const_pointer p, size_t sz); -std::string to_native(coda_oss::u8string::const_pointer, size_t); // std::string is Windows-1252 or UTF-8 depending on platform -std::string to_native(str::W1252string::const_pointer s, size_t sz); // std::string is Windows-1252 or UTF-8 depending on platform -inline std::string to_native(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) // std::string is Windows-1252 or UTF-8 depending on platform -{ - return is_utf8 ? to_native(cast(s), sz) - : to_native(cast(s), sz); -} -inline std::string to_native(std::string::const_pointer s, size_t sz) -{ - return std::string(s, sz); -} -template -inline std::string to_native(const std::basic_string& s) +namespace details // YOU should use EncodedStringView { - return to_native(s.c_str(), s.size()); -} +void w1252to8(str::W1252string::const_pointer p, size_t sz, std::string&); // encoding is lost +void utf16to8(std::u16string::const_pointer, size_t, std::string&); // encoding is lost +void utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::string&); // encoding is lost } } diff --git a/externals/coda-oss/modules/c++/str/source/EncodedString.cpp b/externals/coda-oss/modules/c++/str/source/EncodedString.cpp index c87d46093..1a43d926f 100644 --- a/externals/coda-oss/modules/c++/str/source/EncodedString.cpp +++ b/externals/coda-oss/modules/c++/str/source/EncodedString.cpp @@ -30,7 +30,7 @@ void str::EncodedString::assign(coda_oss::u8string::const_pointer s) using char_t = std::remove_pointer::type; // avoid copy-paste error using string_t = std::basic_string::type>; s_ = cast(s); // copy - v_ = EncodedStringView(c_str(s_)); + v_ = EncodedStringView(str::c_str(s_)); } void str::EncodedString::assign(str::W1252string::const_pointer s) @@ -38,7 +38,7 @@ void str::EncodedString::assign(str::W1252string::const_pointer s) using char_t = std::remove_pointer::type; // avoid copy-paste error using string_t = std::basic_string::type>; s_ = cast(s); // copy - v_ = EncodedStringView(c_str(s_)); // avoid copy-paste error + v_ = EncodedStringView(str::c_str(s_)); // avoid copy-paste error } static str::EncodedStringView make_EncodedStringView(const std::string& s, bool isUtf8) @@ -52,8 +52,9 @@ static str::EncodedStringView make_EncodedStringView(const std::string& s, bool return str::EncodedStringView(str::c_str(s)); } -str::EncodedString::EncodedString(std::string::const_pointer s) : s_(s) /*copy*/, v_ (s_) { } -str::EncodedString::EncodedString(const std::string& s) : s_(s) /*copy*/, v_ (s_) { } +str::EncodedString::EncodedString(std::string::const_pointer s) : s_(s) /*copy*/, v_(s_) { } +str::EncodedString::EncodedString(const std::string& s) : s_(s) /*copy*/, v_(s_) { } +str::EncodedString::EncodedString() : EncodedString(""){ } str::EncodedString::EncodedString(coda_oss::u8string::const_pointer s) { @@ -70,9 +71,21 @@ str::EncodedString::EncodedString(const str::W1252string& s) : EncodedString(s.c str::EncodedString::EncodedString(const std::u16string& s) : EncodedString(to_u8string(s)) { } str::EncodedString::EncodedString(const std::u32string& s) : EncodedString(to_u8string(s)) { } -str::EncodedString::EncodedString(std::wstring::const_pointer s) : EncodedString(to_u8string(s, wcslen(s))) { } -str::EncodedString::EncodedString(const std::wstring& s) : EncodedString(to_u8string(s)) { } +static inline coda_oss::u8string to_u8string_(std::wstring::const_pointer p_, size_t sz) // std::wstring is UTF-16 or UTF-32 depending on platform +{ + const auto p = + // Need to use #ifdef's because str::cast() checks to be sure the sizes are correct. + #if _WIN32 + str::cast(p_); // std::wstring is UTF-16 on Windows + #endif + #if !_WIN32 + str::cast(p_); // std::wstring is UTF-32 on Linux + #endif + return str::to_u8string(p, sz); +} +str::EncodedString::EncodedString(std::wstring::const_pointer s) : EncodedString(to_u8string_(s, wcslen(s))) { } +str::EncodedString::EncodedString(const std::wstring& s) : EncodedString(to_u8string_(s.c_str(), s.size())) { } // create from a view str::EncodedString& str::EncodedString::operator=(const EncodedStringView& v) diff --git a/externals/coda-oss/modules/c++/str/source/EncodedStringView.cpp b/externals/coda-oss/modules/c++/str/source/EncodedStringView.cpp index 008c5cb73..bb3293dd5 100644 --- a/externals/coda-oss/modules/c++/str/source/EncodedStringView.cpp +++ b/externals/coda-oss/modules/c++/str/source/EncodedStringView.cpp @@ -34,6 +34,66 @@ #include "str/Encoding.h" #include "str/EncodedString.h" +enum class PlatformType +{ + Windows, + Linux, + // MacOS +}; + +#if _WIN32 +static auto Platform = PlatformType::Windows; +#else +static auto Platform = PlatformType::Linux; +#endif + +inline std::u16string to_u16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) +{ + if (is_utf8) + { + return str::to_u16string(str::cast(s), sz); + } + return str::to_u16string(str::cast(s), sz); +} +inline str::ui16string to_ui16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) +{ + if (is_utf8) + { + return str::to_ui16string(str::cast(s), sz); + } + return str::to_ui16string(str::cast(s), sz); +} + +static std::string to_native(coda_oss::u8string::const_pointer p, size_t sz) +{ + if (Platform == PlatformType::Windows) + { + std::string retval; + str::details::utf8to1252(p, sz, retval); + return retval; + } + if (Platform == PlatformType::Linux) + { + return str::cast(p); // copy + } + throw std::logic_error("Unknown platform."); +} + +static std::string to_native(str::W1252string::const_pointer p, size_t sz) +{ + if (Platform == PlatformType::Windows) + { + return str::cast(p); // copy + } + if (Platform == PlatformType::Linux) + { + std::string retval; + str::details::w1252to8(p, sz, retval); + return retval; + } + throw std::logic_error("Unknown platform."); +} + template inline coda_oss::span make_span(const CharT* s) { @@ -56,40 +116,66 @@ str::EncodedStringView::EncodedStringView(const str::W1252string& s) : mString(m std::string str::EncodedStringView::native() const { - return str::details::to_native(mString.data(), mString.size(), mIsUtf8); + const auto s = mString.data(); + const auto sz = mString.size(); + return mIsUtf8 ? to_native(str::cast(s), sz) + : to_native(str::cast(s), sz); } coda_oss::u8string str::EncodedStringView::u8string() const { - return str::details::to_u8string(mString.data(), mString.size(), mIsUtf8); + return mIsUtf8 ? + str::cast(mString.data()) : // copy + str::to_u8string(str::cast(mString.data()), mString.size()); } -std::string& str::EncodedStringView::toUtf8(std::string& result) const +std::string str::EncodedStringView::asUtf8() const { - return str::details::to_u8string(mString.data(), mString.size(), mIsUtf8, result); + const auto result = u8string(); + return str::c_str(result); // cast & copy } std::u16string str::EncodedStringView::u16string() const { - return str::details::to_u16string(mString.data(), mString.size(), mIsUtf8); + return ::to_u16string(mString.data(), mString.size(), mIsUtf8); } str::ui16string str::EncodedStringView::ui16string_() const { - return str::details::to_ui16string(mString.data(), mString.size(), mIsUtf8); + return ::to_ui16string(mString.data(), mString.size(), mIsUtf8); } +inline std::u32string to_u32string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) +{ + if (is_utf8) + { + return str::to_u32string(str::cast(s), sz); + } + return str::to_u32string(str::cast(s), sz); +} std::u32string str::EncodedStringView::u32string() const { - return str::details::to_u32string(mString.data(), mString.size(), mIsUtf8); + return ::to_u32string(mString.data(), mString.size(), mIsUtf8); } + std::wstring str::EncodedStringView::wstring() const // UTF-16 on Windows, UTF-32 on Linux { - return str::details::to_wstring(mString.data(), mString.size(), mIsUtf8); + const auto p = mString.data(); + const auto sz = mString.size(); + const auto s = + // Need to use #ifdef's because str::cast() checks to be sure the sizes are correct. + #if _WIN32 + ::to_u16string(p, sz, mIsUtf8); // std::wstring is UTF-16 on Windows + #endif + #if !_WIN32 + ::to_u32string(p, sz, mIsUtf8); // std::wstring is UTF-32 on Linux + #endif + return str::c_str(s); // copy } - str::W1252string str::EncodedStringView::w1252string() const { - return str::details::to_w1252string(mString.data(), mString.size(), mIsUtf8); + return mIsUtf8 ? + str::to_w1252string(str::cast(mString.data()), mString.size()) : + str::cast(mString.data()); // copy } std::string str::EncodedStringView::asWindows1252() const { diff --git a/externals/coda-oss/modules/c++/str/source/Encoding.cpp b/externals/coda-oss/modules/c++/str/source/Encoding.cpp index 65c42a213..19cb02d9c 100644 --- a/externals/coda-oss/modules/c++/str/source/Encoding.cpp +++ b/externals/coda-oss/modules/c++/str/source/Encoding.cpp @@ -34,6 +34,7 @@ #include "str/Manip.h" #include "str/Convert.h" #include "str/utf8.h" +#include "str/EncodedStringView.h" // Need to look up characters from \x80 (EURO SIGN) to \x9F (LATIN CAPITAL LETTER Y WITH DIAERESIS) // in a map: http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT @@ -145,13 +146,41 @@ static void fromWindows1252_(str::W1252string::value_type ch, std::basic_string< append(result, replacement_character); } template -void windows1252_to_string(str::W1252string::const_pointer p, size_t sz, std::basic_string& result) +void windows1252_to_string_(str::W1252string::const_pointer p, size_t sz, std::basic_string& result) { for (size_t i = 0; i < sz; i++) { fromWindows1252_(p[i], result); } } +template +inline TReturn to_Tstring(str::W1252string::const_pointer p, size_t sz) +{ + TReturn retval; + windows1252_to_string_(p, sz, retval); + return retval; +} + +inline void windows1252_to_string(str::W1252string::const_pointer p, size_t sz, coda_oss::u8string& result) +{ + windows1252_to_string_(p, sz, result); +} +void str::details::w1252to8(str::W1252string::const_pointer p, size_t sz, std::string& result) +{ + result = to_Tstring(p, sz); +} +std::u16string str::to_u16string(str::W1252string::const_pointer p, size_t sz) +{ + return to_Tstring(p, sz); +} +str::ui16string str::to_ui16string(str::W1252string::const_pointer p, size_t sz) +{ + return to_Tstring(p, sz); +} +std::u32string str::to_u32string(str::W1252string::const_pointer p, size_t sz) +{ + return to_Tstring(p, sz); +} template std::map kv_to_vk(const std::map& kv) @@ -223,6 +252,16 @@ static void utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::basi } } } +void str::details::utf8to1252(coda_oss::u8string::const_pointer p, size_t sz, std::string& result) +{ + ::utf8to1252(p, sz, result); +} +str::W1252string str::to_w1252string(coda_oss::u8string::const_pointer p, size_t sz) +{ + str::W1252string retval; + utf8to1252(p, sz, retval); + return retval; +} struct back_inserter final { @@ -260,10 +299,9 @@ coda_oss::u8string str::to_u8string(std::u16string::const_pointer p, size_t sz) utf8::utf8to16(begin, begin+result.size(), std::back_inserter(utf16line)); */ } -std::string& str::details::to_u8string(std::u16string::const_pointer p, size_t sz, std::string& result) +void str::details::utf16to8(std::u16string::const_pointer p, size_t sz, std::string& result) { utf8::utf16to8(p, p + sz, std::back_inserter(result)); - return result; } std::u16string str::to_u16string(coda_oss::u8string::const_pointer p_, size_t sz) @@ -273,6 +311,13 @@ std::u16string str::to_u16string(coda_oss::u8string::const_pointer p_, size_t sz utf8::utf8to16(p, p + sz, std::back_inserter(retval)); return retval; } +str::ui16string str::to_ui16string(coda_oss::u8string::const_pointer p_, size_t sz) +{ + auto p = str::cast(p_); + str::ui16string retval; + utf8::utf8to16(p, p + sz, std::back_inserter(retval)); + return retval; +} std::u32string str::to_u32string(coda_oss::u8string::const_pointer p_, size_t sz) { @@ -288,171 +333,16 @@ coda_oss::u8string str::to_u8string(std::u32string::const_pointer p, size_t sz) utf8::utf32to8(p, p + sz, back_inserter(retval)); return retval; } -std::string& str::details::to_u8string(std::u32string::const_pointer p, size_t sz, std::string& result) -{ - utf8::utf32to8(p, p + sz, std::back_inserter(result)); - return result; -} coda_oss::u8string str::to_u8string(W1252string::const_pointer p, size_t sz) { coda_oss::u8string retval; - windows1252_to_string(p, sz, retval); + ::windows1252_to_string(p, sz, retval); return retval; } -std::string& str::details::to_u8string(std::string::const_pointer p, size_t sz, bool is_utf8 /* is 'p' UTF-8? */, std::string& result) -{ - if (is_utf8) - { - result = p; // copy - } - else - { - windows1252_to_string(cast(p), sz, result); - } - return result; -} - -coda_oss::u8string str::details::to_u8string(std::string::const_pointer p, size_t sz, bool is_utf8 /* is 'p' UTF-8? */) -{ - return is_utf8 ? - cast(p) : // copy - to_u8string(cast(p), sz); -} -coda_oss::u8string str::to_u8string(std::string::const_pointer p, size_t sz) -{ - auto platform = details::Platform; // "conditional expression is constant" - return details::to_u8string(p, sz, platform == details::PlatformType::Linux); // std::string is UTF-8 on Linux -} - -template -static inline TReturn to_16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - TReturn retval; - if (is_utf8) - { - auto p_ = str::cast(s); - auto p = str::cast(p_); - utf8::utf8to16(p, p + sz, std::back_inserter(retval)); - } - else - { - windows1252_to_string(str::cast(s), sz, retval); - } - return retval; -} -std::u16string str::details::to_u16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - return to_16string(s, sz, is_utf8); -} -str::ui16string str::details::to_ui16string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - return to_16string(s, sz, is_utf8); -} - -std::u32string str::details::to_u32string(std::string::const_pointer s, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - if (is_utf8) - { - return str::to_u32string(cast(s), sz); - } - - std::u32string retval; - windows1252_to_string(cast(s), sz, retval); - return retval; -} -std::wstring str::details::to_wstring(std::string::const_pointer p, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - const auto s = - // Need to use #ifdef's because str::cast() checks to be sure the sizes are correct. - #if _WIN32 - to_u16string(p, sz, is_utf8); // std::wstring is UTF-16 on Windows - #endif - #if !_WIN32 - to_u32string(p, sz, is_utf8); // std::wstring is UTF-32 on Linux - #endif - return str::c_str(s); // copy -} - -coda_oss::u8string str::to_u8string(std::wstring::const_pointer p_, size_t sz) // std::wstring is UTF-16 or UTF-32 depending on platform -{ - const auto p = - // Need to use #ifdef's because str::cast() checks to be sure the sizes are correct. - #if _WIN32 - str::cast(p_); // std::wstring is UTF-16 on Windows - #endif - #if !_WIN32 - str::cast(p_); // std::wstring is UTF-32 on Linux - #endif - return to_u8string(p, sz); -} - -str::W1252string str::details::to_w1252string(coda_oss::u8string::const_pointer p, size_t sz) -{ - str::W1252string retval; - utf8to1252(p, sz, retval); - return retval; -} - -str::W1252string str::details::to_w1252string(std::string::const_pointer p, size_t sz, bool is_utf8 /* is 's' UTF-8? */) -{ - return is_utf8 ? - to_w1252string(cast(p), sz) : - cast(p); // copy -} -str::W1252string str::details::to_w1252string(std::string::const_pointer p, size_t sz) -{ - auto platform = details::Platform; // "conditional expression is constant" - return to_w1252string(p, sz, platform == details::PlatformType::Linux); // std::string is UTF-8 on Linux -} - -std::string str::details::to_native(coda_oss::u8string::const_pointer p, size_t sz) -{ - auto platform = str::details::Platform; // "conditional expression is constant" - if (platform == str::details::PlatformType::Windows) - { - std::string retval; - utf8to1252(p, sz, retval); - return retval; - } - if (platform == str::details::PlatformType::Linux) - { - auto retval = cast(p); - return retval != nullptr ? retval /* copy */ : ""; - } - throw std::logic_error("Unknown platform."); -} - -std::string str::details::to_native(W1252string::const_pointer p, size_t sz) -{ - auto platform = details::Platform; // "conditional expression is constant" - if (platform == details::PlatformType::Windows) - { - auto retval = cast(p); - return retval != nullptr ? retval /* copy */ : ""; - } - if (platform == details::PlatformType::Linux) - { - std::string retval; - windows1252_to_string(p, sz, retval); - return retval; - } - throw std::logic_error("Unknown platform."); -} - -coda_oss::u8string str::fromWindows1252(std::string::const_pointer p, size_t sz) -{ - return to_u8string(cast(p), sz); -} - -coda_oss::u8string str::fromUtf8(std::string::const_pointer p, size_t sz) -{ - return to_u8string(cast(p), sz); -} - template <> std::string str::toString(const coda_oss::u8string& utf8) { - return str::details::to_native(utf8); + return str::EncodedStringView(utf8).native(); } diff --git a/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp b/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp index 9e8774f73..ce581c6c6 100644 --- a/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp +++ b/externals/coda-oss/modules/c++/str/unittests/test_base_convert.cpp @@ -1,442 +1,434 @@ -/* ========================================================================= - * This file is part of str-c++ - * ========================================================================= - * - * (C) Copyright 2004 - 2014, MDA Information Systems LLC - * - * str-c++ is free software; you can redistribute it and/or modify - * it under the terms of the GNU Lesser General Public License as published by - * the Free Software Foundation; either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; If not, - * see . - * - */ - -#include - -#include -#include -#include -#include - -#include "coda_oss/CPlusPlus.h" - -#include -#include -#include - -#include "TestCase.h" - -// It seems that a macro is better than a utility routine, see https://github.com/tahonermann/char8_t-remediation -// C++20 changed the type of u8 to char8_t* https://en.cppreference.com/w/cpp/language/string_literal -// Not putting this everywhere because (1) well, it's a macro, and (2) it's mostly -// only test code that uses string literals. -#if CODA_OSS_cpp20 -#define U8(ch) u8##ch -#define U8s(s) u8##s -#else -#define U8(ch) static_cast(ch) -#define U8s(s) static_cast(static_cast(s)) -#endif - -static std::string to_string(const std::u8string& value) -{ - return str::c_str(value); // copy -} -static std::string to_string(const std::u32string& s) -{ - return to_string(str::to_u8string(s)); -} - -TEST_CASE(testConvert) -{ - TEST_ASSERT_EQ(str::toType("0x3BC7", 16), (long long) 0x3BC7); - TEST_ASSERT_EQ(str::toType("1101", 2), (long long) 13); - TEST_ASSERT_EQ(str::toType("231", 5), (long long) 66); - TEST_ASSERT_EQ(str::toType("0xFFFFFFFFFFFFFFFF", 16), - (unsigned long long) 0xFFFFFFFFFFFFFFFF); - TEST_ASSERT_EQ(str::toType("-10", 10), - (unsigned long long) -10); - TEST_ASSERT_EQ(str::toType("13", 4), (short) 7); -} - -TEST_CASE(testBadConvert) -{ - TEST_EXCEPTION(str::toType("Not a number", 10)); - TEST_EXCEPTION(str::toType("0xFFFFFFFFFFFFFFFF", 16)); - TEST_EXCEPTION(str::toType("0xFFFFF", 16)); -} - -TEST_CASE(testEightBitIntToString) -{ - TEST_ASSERT_EQ(str::toString(1), "1"); - TEST_ASSERT_EQ(str::toString(2), "2"); - TEST_ASSERT_EQ(str::toString(-2), "-2"); -} - -TEST_CASE(testCharToString) -{ - TEST_ASSERT_EQ(str::toString('a'), "a"); - TEST_ASSERT_EQ(str::toString(65), "A"); -} - -static inline std::u8string fromWindows1252(const std::string& s) -{ - // s is Windows-1252 on ALL platforms - return str::fromWindows1252(s.c_str(), s.size()); -} - -template -static inline constexpr std::u32string::value_type U(TChar ch) -{ - return static_cast(ch); -} - -TEST_CASE(test_string_to_u8string_ascii) -{ - { - const std::string input = "|\x00"; // ASCII, "|" - const auto actual = fromWindows1252(input); - const std::u8string expected{U8('|')}; // '\x00' is the end of the string in C/C++ - TEST_ASSERT_EQ(actual, expected); - } - constexpr uint8_t start_of_heading = 0x01; - constexpr uint8_t delete_character = 0x7f; - for (uint8_t ch = start_of_heading; ch <= delete_character; ch++) // ASCII - { - const std::string input { '|', static_cast(ch), '|'}; - const auto actual = fromWindows1252(input); - const std::u8string expected8{U8('|'), U8(ch), U8('|')}; - TEST_ASSERT_EQ(actual, expected8); - const std::u32string expected{U'|', U(ch), U'|'}; - TEST_ASSERT_EQ(to_string(actual), to_string(expected)); - } -} - -TEST_CASE(test_string_to_u8string_windows_1252) -{ - // Windows-1252 only characters must be mapped to UTF-8 - { - const std::string input = "|\x80|"; // Windows-1252, "|�|" - const auto actual = fromWindows1252(input); - const std::u8string expected8{U8s("|\xE2\x82\xAC|")}; // UTF-8, "|�|" - TEST_ASSERT_EQ(actual, expected8); - const std::u32string expected{U'|', 0x20AC, U'|'}; // UTF-32, "|�|" - TEST_ASSERT_EQ(to_string(actual), to_string(expected)); - } - { - const std::string input = "|\x9F|"; // Windows-1252, "|�|" - const auto actual = fromWindows1252(input); - const std::u8string expected8{U8s("|\xC5\xB8|")}; // UTF-8, "|�|" - TEST_ASSERT_EQ(actual, expected8); - const std::u32string expected{U'|', 0x0178, U'|'}; // UTF-32, "|�|" - TEST_ASSERT_EQ(to_string(actual), to_string(expected)); - } - { - const std::vector undefined{ '\x81', '\x8d', '\x8f', '\x90', '\x9d' }; - for (const auto& ch : undefined) - { - const std::string input{'|', ch, '|'}; - const auto actual = fromWindows1252(input); - static const std::u8string expected8{U8s("|\xEF\xBF\xBD|")}; // UTF-8, "||" - TEST_ASSERT_EQ(actual, expected8); - const std::u32string expected{U'|', 0xfffd, U'|'}; // UTF-32, "||" - TEST_ASSERT_EQ(to_string(actual), to_string(expected)); - } - } - { - // http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT - const std::vector windows1252_characters{ - //0x80, // EURO SIGN - //0x82, // SINGLE LOW-9 QUOTATION MARK - //0x83, // LATIN SMALL LETTER F WITH HOOK - //0x84, // DOUBLE LOW-9 QUOTATION MARK - //0x85, // HORIZONTAL ELLIPSIS - //0x86, // DAGGER - //0x87, // DOUBLE DAGGER - //0x88, // MODIFIER LETTER CIRCUMFLEX ACCENT - //0x89, // PER MILLE SIGN - //0x8A, // LATIN CAPITAL LETTER S WITH CARON - //0x8B, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK - //0x8C, // LATIN CAPITAL LIGATURE OE - 0x8E, // LATIN CAPITAL LETTER Z WITH CARON - 0x91, // LEFT SINGLE QUOTATION MARK - 0x92, // RIGHT SINGLE QUOTATION MARK - 0x93, // LEFT DOUBLE QUOTATION MARK - 0x94, // RIGHT DOUBLE QUOTATION MARK - 0x95, // BULLET - 0x96, // EN DASH - 0x97, // EM DASH - 0x98, // SMALL TILDE - 0x99, // TRADE MARK SIGN - 0x9A, // LATIN SMALL LETTER S WITH CARON - 0x9B, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK - 0x9C, // LATIN SMALL LIGATURE OE - 0x9E, // LATIN SMALL LETTER Z WITH CARON - 0x9F}; // LATIN CAPITAL LETTER Y WITH DIAERESIS - for (const auto& ch : windows1252_characters) - { - const std::string input_ { '|', static_cast(ch), '|'}; - const str::W1252string input(str::c_str(input_)); - const auto actual = to_u8string(input); - - // No "expected" to test against as the UTF-8 values for these Windows-1252 characters - // are mapped one-by-one. However, we can test that UTF-8 to Windows-1252 - // works as that walks through a UTF-8 string which can have 1-, 2-, 3- and 4-bytes - // for a single code-point. - const str::W1252string w1252 = str::details::to_w1252string(actual.data(), actual.size()); - TEST_ASSERT(input == w1252); - - // Can't compare the values with == because TEST_ASSERT_EQ() - // wants to do toString() and that doesn't work on Linux as the encoding - // is wrong (see above). - //const std::string w1252_ = str::c_str(w1252); - //TEST_ASSERT_EQ(input_, w1252_); - const str::EncodedStringView inputView(input); - const str::EncodedStringView w1252View(w1252); - TEST_ASSERT_EQ(inputView, w1252View); - } - } -} - -TEST_CASE(test_string_to_u8string_iso8859_1) -{ - constexpr uint8_t nobreak_space = 0xa0; - constexpr uint8_t latin_small_letter_y_with_diaeresis = 0xff; // '�' - for (uint32_t ch = nobreak_space; ch <= latin_small_letter_y_with_diaeresis; ch++) // ISO8859-1 - { - const std::string input_ { '|', static_cast(ch), '|'}; - const str::W1252string input(str::c_str(input_)); - const auto actual = to_u8string(input); - const std::u32string expected{U'|', U(ch), U'|'}; - TEST_ASSERT_EQ(to_string(actual), to_string(expected)); - - // Can't compare the values with == because TEST_ASSERT_EQ() - // wants to do toString() and that doesn't work on Linux as the encoding - // is wrong (see above). - //std::string actual_; - //str::details::toString(actual.c_str(), actual_); - //TEST_ASSERT_EQ(input_, actual_); - const str::EncodedStringView inputView(input); - const str::EncodedStringView actualView(actual); - TEST_ASSERT_EQ(inputView, actualView); - } -} - -template -static void test_change_case_(const std::string& testName, const TString& lower, const TString& upper) -{ - auto s = upper; - str::lower(s); - TEST_ASSERT(s == lower); - s = lower; - str::upper(s); - TEST_ASSERT(s == upper); - - s = upper; - str::upper(s); - TEST_ASSERT(s == upper); - s = lower; - str::lower(s); - TEST_ASSERT(s == lower); -} -TEST_CASE(test_change_case) -{ - const std::string ABC = "ABC"; - const std::string abc = "abc"; - test_change_case_(testName, abc, ABC); - - //const std::wstring ABC_w = L"ABC"; - //const std::wstring abc_w = L"abc"; - //test_change_case_(testName, abc_w, ABC_w); - - //// Yes, this can really come up, "non classifi�" is French (Canadian) for "unclassified". - //const std::string DEF_1252{'D', '\xc9', 'F'}; // "D�F" Windows-1252 - //const auto DEF8 = fromWindows1252(DEF_1252); - - //const std::string def_1252{'d', '\xe9', 'f'}; // "d�f" Windows-1252 - //const auto def8 = fromWindows1252(def_1252); - - ////test_change_case_(testName, def, DEF); - //test_change_case_(testName, def_1252, DEF_1252); -} - -// https://en.wikipedia.org/wiki/%C3%89#Character_mappings -static const str::EncodedString& classificationText_utf_8() -{ - static const str::EncodedString retval(str::cast("A\xc3\x89IOU")); // UTF-8 "A�IOU" - return retval; - } -static const str::EncodedString& classificationText_iso8859_1() -{ - static const str::EncodedString retval(str::cast("A\xc9IOU")); // ISO8859-1 "A�IOU" - return retval; - } -// UTF-16 on Windows, UTF-32 on Linux -static const wchar_t* classificationText_wide_() { return L"A\xc9IOU"; } // UTF-8 "A�IOU" -static str::EncodedString classificationText_wide() { return str::EncodedString(classificationText_wide_()); } -static std::string classificationText_platform() { return - sys::Platform == sys::PlatformType::Linux ? classificationText_utf_8().native() : classificationText_iso8859_1().native(); } - -TEST_CASE(test_u8string_to_string) -{ - { - const auto utf8 = classificationText_utf_8().u8string(); - const str::EncodedStringView utf8View(utf8); - const auto actual = utf8View.native(); - TEST_ASSERT_EQ(classificationText_platform(), actual); - } - { - const auto utf8 = classificationText_iso8859_1().u8string(); - const str::EncodedStringView utf8View(utf8); - const auto actual = utf8View.native(); - TEST_ASSERT_EQ(classificationText_platform(), actual); - } -} - -TEST_CASE(test_u8string_to_u16string) -{ - #if _WIN32 - const auto actual = classificationText_utf_8().u16string(); - const std::wstring s = str::c_str(actual); // Windows: std::wstring == std::u16string - TEST_ASSERT(classificationText_wide_() == s); // _EQ wants to do toString() - #endif - - TEST_ASSERT_EQ(classificationText_wide(), classificationText_utf_8()); - TEST_ASSERT_EQ(classificationText_wide(), classificationText_iso8859_1()); - - TEST_ASSERT(classificationText_wide().u16string() == classificationText_utf_8().u16string()); // _EQ wants to do toString() - TEST_ASSERT(classificationText_wide().u16string() == classificationText_iso8859_1().u16string()); // _EQ wants to do toString() -} - -TEST_CASE(test_u8string_to_u32string) -{ - #if !_WIN32 - const auto actual = classificationText_utf_8().u32string(); - const std::wstring s = str::c_str(actual); // Linux: std::wstring == std::u32string - TEST_ASSERT(classificationText_wide_() == s); // _EQ wants to do toString() - #endif - - TEST_ASSERT_EQ(classificationText_wide(), classificationText_utf_8()); - TEST_ASSERT_EQ(classificationText_wide(), classificationText_iso8859_1()); - - TEST_ASSERT(classificationText_wide().u32string() == classificationText_utf_8().u32string()); // _EQ wants to do toString() - TEST_ASSERT(classificationText_wide().u32string() == classificationText_iso8859_1().u32string()); // _EQ wants to do toString() -} - -static void test_EncodedStringView_(const std::string& testName, - const str::EncodedStringView& utf_8_view, const str::EncodedStringView& iso8859_1_view) -{ - (void)testName; - TEST_ASSERT_EQ(iso8859_1_view, iso8859_1_view); - TEST_ASSERT_EQ(utf_8_view, utf_8_view); - TEST_ASSERT_EQ(iso8859_1_view, utf_8_view); - TEST_ASSERT_EQ(utf_8_view, iso8859_1_view); - - TEST_ASSERT_EQ(iso8859_1_view.native(), utf_8_view.native()); - const auto native = classificationText_platform(); - TEST_ASSERT_EQ(iso8859_1_view.native(), native); - TEST_ASSERT_EQ(utf_8_view.native(), native); - - TEST_ASSERT(utf_8_view == classificationText_utf_8()); - TEST_ASSERT_EQ(utf_8_view, classificationText_utf_8()); - TEST_ASSERT(iso8859_1_view == classificationText_utf_8()); - TEST_ASSERT_EQ(iso8859_1_view, classificationText_utf_8()); - TEST_ASSERT(iso8859_1_view.u8string() == utf_8_view.u8string()); - - const auto expected = str::EncodedString::details::string(classificationText_utf_8()); - { - std::string buf; - const auto& actual = utf_8_view.toUtf8(buf); - TEST_ASSERT_EQ(actual, expected); - } - { - std::string buf; - const auto& actual = iso8859_1_view.toUtf8(buf); - TEST_ASSERT_EQ(actual, expected); - } -} -TEST_CASE(test_EncodedStringView) -{ - str::EncodedStringView esv; - auto copy(esv); - copy = esv; // assignment - - { - auto utf_8_view(classificationText_utf_8().view()); - auto iso8859_1_view(classificationText_iso8859_1().view()); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - - utf_8_view = classificationText_iso8859_1().view(); - iso8859_1_view = classificationText_utf_8().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - } - { - auto utf_8_view = classificationText_utf_8().view(); - auto iso8859_1_view = classificationText_iso8859_1().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - - utf_8_view = classificationText_iso8859_1().view(); - iso8859_1_view = classificationText_utf_8().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - } - { - str::EncodedStringView utf_8_view; - utf_8_view = classificationText_iso8859_1().view(); - str::EncodedStringView iso8859_1_view; - iso8859_1_view = classificationText_utf_8().view(); - test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); - } -} - -TEST_CASE(test_EncodedString) -{ - str::EncodedString es; - TEST_ASSERT_TRUE(es.native().empty()); - { - str::EncodedString es_copy(es); // copy - TEST_ASSERT_TRUE(es_copy.native().empty()); - } - es = str::EncodedString("abc"); // assignment - TEST_ASSERT_EQ(es.native(), "abc"); - { - str::EncodedString es_copy(es); // copy, again; this time w/o default content - TEST_ASSERT_EQ(es_copy.native(), "abc"); - } - - str::EncodedString abc(es); // copy, for use below - TEST_ASSERT_EQ(abc.native(), "abc"); - - str::EncodedString es2; - es = std::move(es2); // move assignment - TEST_ASSERT_TRUE(es.native().empty()); - str::EncodedString abc_(abc); // copy - es = std::move(abc_); // move assignment, w/o default content - TEST_ASSERT_EQ(es.native(), "abc"); - - str::EncodedString es3(std::move(abc)); // move constructor - TEST_ASSERT_EQ(es3.native(), "abc"); -} -TEST_MAIN( - TEST_CHECK(testConvert); - TEST_CHECK(testBadConvert); - TEST_CHECK(testEightBitIntToString); - TEST_CHECK(testCharToString); - TEST_CHECK(test_string_to_u8string_ascii); - TEST_CHECK(test_string_to_u8string_windows_1252); - TEST_CHECK(test_string_to_u8string_iso8859_1); - TEST_CHECK(test_change_case); - TEST_CHECK(test_u8string_to_string); - TEST_CHECK(test_u8string_to_u16string); - TEST_CHECK(test_u8string_to_u32string); - TEST_CHECK(test_EncodedStringView); - TEST_CHECK(test_EncodedString); +/* ========================================================================= + * This file is part of str-c++ + * ========================================================================= + * + * (C) Copyright 2004 - 2014, MDA Information Systems LLC + * + * str-c++ is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; If not, + * see . + * + */ + +#include + +#include +#include +#include + +#include +#include +#include + +#include "TestCase.h" + +inline static void test_assert_eq(const std::string& testName, const std::u8string& actual, const std::u8string& expected) +{ + TEST_ASSERT_EQ(actual, expected); +} +inline static void test_assert_eq(const std::string& testName, const std::u8string& actual, const std::u32string& expected) +{ + test_assert_eq(testName, actual, str::to_u8string(expected)); +} + +TEST_CASE(testConvert) +{ + TEST_ASSERT_EQ(str::toType("0x3BC7", 16), (long long) 0x3BC7); + TEST_ASSERT_EQ(str::toType("1101", 2), (long long) 13); + TEST_ASSERT_EQ(str::toType("231", 5), (long long) 66); + TEST_ASSERT_EQ(str::toType("0xFFFFFFFFFFFFFFFF", 16), + (unsigned long long) 0xFFFFFFFFFFFFFFFF); + TEST_ASSERT_EQ(str::toType("-10", 10), + (unsigned long long) -10); + TEST_ASSERT_EQ(str::toType("13", 4), (short) 7); +} + +TEST_CASE(testBadConvert) +{ + TEST_EXCEPTION(str::toType("Not a number", 10)); + TEST_EXCEPTION(str::toType("0xFFFFFFFFFFFFFFFF", 16)); + TEST_EXCEPTION(str::toType("0xFFFFF", 16)); +} + +TEST_CASE(testEightBitIntToString) +{ + TEST_ASSERT_EQ(str::toString(1), "1"); + TEST_ASSERT_EQ(str::toString(2), "2"); + TEST_ASSERT_EQ(str::toString(-2), "-2"); +} + +TEST_CASE(testCharToString) +{ + TEST_ASSERT_EQ(str::toString('a'), "a"); + TEST_ASSERT_EQ(str::toString(65), "A"); +} + +static std::u8string fromWindows1252(const std::string& s) +{ + // s is Windows-1252 on ALL platforms + return str::EncodedStringView::fromWindows1252(s).u8string(); +} + +template +static constexpr std::u8string::value_type cast8(T ch) +{ + static_assert(sizeof(std::u8string::value_type) == sizeof(char), "sizeof(Char8_T) != sizeof(char)"); + return static_cast(ch); +} +template +static inline constexpr std::u32string::value_type U(TChar ch) +{ + return static_cast(ch); +} + +TEST_CASE(test_string_to_u8string_ascii) +{ + { + const std::string input = "|\x00"; // ASCII, "|" + const auto actual = fromWindows1252(input); + const std::u8string expected{cast8('|')}; // '\x00' is the end of the string in C/C++ + TEST_ASSERT_EQ(actual, expected); + } + constexpr uint8_t start_of_heading = 0x01; + constexpr uint8_t delete_character = 0x7f; + for (uint8_t ch = start_of_heading; ch <= delete_character; ch++) // ASCII + { + const std::string input { '|', static_cast(ch), '|'}; + const auto actual = fromWindows1252(input); + const std::u8string expected8{cast8('|'), cast8(ch), cast8('|')}; + test_assert_eq(testName, actual, expected8); + const std::u32string expected{U'|', U(ch), U'|'}; + test_assert_eq(testName, actual, expected); + } +} + +TEST_CASE(test_string_to_u8string_windows_1252) +{ + // Windows-1252 only characters must be mapped to UTF-8 + { + const std::string input = "|\x80|"; // Windows-1252, "|�|" + const auto actual = fromWindows1252(input); + const std::u8string expected8{cast8('|'), cast8('\xE2'), cast8('\x82'), cast8('\xAC'), cast8('|')}; // UTF-8, "|�|" + test_assert_eq(testName, actual, expected8); + const std::u32string expected{U"|\U000020AC|"}; // UTF-32, "|�|" + test_assert_eq(testName, actual, expected); + } + { + const std::string input = "|\x9F|"; // Windows-1252, "|�|" + const auto actual = fromWindows1252(input); + const std::u8string expected8{cast8('|'), cast8('\xC5'), cast8('\xB8'), cast8('|')}; // UTF-8, "|�|" + test_assert_eq(testName, actual, expected8); + const std::u32string expected{U"|\U00000178|"}; // UTF-32, "|�|" + test_assert_eq(testName, actual, expected); + } + { + const std::vector undefined{ '\x81', '\x8d', '\x8f', '\x90', '\x9d' }; + for (const auto& ch : undefined) + { + const std::string input{'|', ch, '|'}; + const auto actual = fromWindows1252(input); + static const std::u8string expected8{cast8('|'), cast8('\xEF'), cast8('\xBF'), cast8('\xBD'), cast8('|')}; // UTF-8, "||" + test_assert_eq(testName, actual, expected8); + const std::u32string expected{U"|\U0000fffd|"}; // UTF-32, "||" + test_assert_eq(testName, actual, expected); + } + } + { + // http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT + const std::vector windows1252_characters{ + //0x80, // EURO SIGN + //0x82, // SINGLE LOW-9 QUOTATION MARK + //0x83, // LATIN SMALL LETTER F WITH HOOK + //0x84, // DOUBLE LOW-9 QUOTATION MARK + //0x85, // HORIZONTAL ELLIPSIS + //0x86, // DAGGER + //0x87, // DOUBLE DAGGER + //0x88, // MODIFIER LETTER CIRCUMFLEX ACCENT + //0x89, // PER MILLE SIGN + //0x8A, // LATIN CAPITAL LETTER S WITH CARON + //0x8B, // SINGLE LEFT-POINTING ANGLE QUOTATION MARK + //0x8C, // LATIN CAPITAL LIGATURE OE + 0x8E, // LATIN CAPITAL LETTER Z WITH CARON + 0x91, // LEFT SINGLE QUOTATION MARK + 0x92, // RIGHT SINGLE QUOTATION MARK + 0x93, // LEFT DOUBLE QUOTATION MARK + 0x94, // RIGHT DOUBLE QUOTATION MARK + 0x95, // BULLET + 0x96, // EN DASH + 0x97, // EM DASH + 0x98, // SMALL TILDE + 0x99, // TRADE MARK SIGN + 0x9A, // LATIN SMALL LETTER S WITH CARON + 0x9B, // SINGLE RIGHT-POINTING ANGLE QUOTATION MARK + 0x9C, // LATIN SMALL LIGATURE OE + 0x9E, // LATIN SMALL LETTER Z WITH CARON + 0x9F}; // LATIN CAPITAL LETTER Y WITH DIAERESIS + for (const auto& ch : windows1252_characters) + { + const std::string input_ { '|', static_cast(ch), '|'}; + const str::W1252string input(str::c_str(input_)); + const auto actual = to_u8string(input); + + // No "expected" to test against as the UTF-8 values for these Windows-1252 characters + // are mapped one-by-one. However, we can test that UTF-8 to Windows-1252 + // works as that walks through a UTF-8 string which can have 1-, 2-, 3- and 4-bytes + // for a single code-point. + const auto w1252 = str::EncodedStringView::details::w1252string(str::EncodedStringView(actual)); + TEST_ASSERT(input == w1252); + + // Can't compare the values with == because TEST_ASSERT_EQ() + // wants to do toString() and that doesn't work on Linux as the encoding + // is wrong (see above). + //const std::string w1252_ = str::c_str(w1252); + //TEST_ASSERT_EQ(input_, w1252_); + const str::EncodedStringView inputView(input); + const str::EncodedStringView w1252View(w1252); + TEST_ASSERT_EQ(inputView, w1252View); + } + } +} + +TEST_CASE(test_string_to_u8string_iso8859_1) +{ + constexpr uint8_t nobreak_space = 0xa0; + constexpr uint8_t latin_small_letter_y_with_diaeresis = 0xff; // '�' + for (uint32_t ch = nobreak_space; ch <= latin_small_letter_y_with_diaeresis; ch++) // ISO8859-1 + { + const std::string input_ { '|', static_cast(ch), '|'}; + const str::W1252string input(str::c_str(input_)); + const auto actual = to_u8string(input); + const std::u32string expected{U'|', U(ch), U'|'}; + test_assert_eq(testName, actual, expected); + + // Can't compare the values with == because TEST_ASSERT_EQ() + // wants to do toString() and that doesn't work on Linux as the encoding + // is wrong (see above). + //std::string actual_; + //str::details::toString(actual.c_str(), actual_); + //TEST_ASSERT_EQ(input_, actual_); + const str::EncodedStringView inputView(input); + const str::EncodedStringView actualView(actual); + TEST_ASSERT_EQ(inputView, actualView); + } +} + +template +static void test_change_case_(const std::string& testName, const TString& lower, const TString& upper) +{ + auto s = upper; + str::lower(s); + TEST_ASSERT(s == lower); + s = lower; + str::upper(s); + TEST_ASSERT(s == upper); + + s = upper; + str::upper(s); + TEST_ASSERT(s == upper); + s = lower; + str::lower(s); + TEST_ASSERT(s == lower); +} +TEST_CASE(test_change_case) +{ + const std::string ABC = "ABC"; + const std::string abc = "abc"; + test_change_case_(testName, abc, ABC); + + //const std::wstring ABC_w = L"ABC"; + //const std::wstring abc_w = L"abc"; + //test_change_case_(testName, abc_w, ABC_w); + + //// Yes, this can really come up, "non classifi�" is French (Canadian) for "unclassified". + //const std::string DEF_1252{'D', '\xc9', 'F'}; // "D�F" Windows-1252 + //const auto DEF8 = fromWindows1252(DEF_1252); + + //const std::string def_1252{'d', '\xe9', 'f'}; // "d�f" Windows-1252 + //const auto def8 = fromWindows1252(def_1252); + + ////test_change_case_(testName, def, DEF); + //test_change_case_(testName, def_1252, DEF_1252); +} + +// https://en.wikipedia.org/wiki/%C3%89#Character_mappings +static const str::EncodedString& classificationText_utf_8() +{ + static const str::EncodedString retval(str::cast("A\xc3\x89IOU")); // UTF-8 "A�IOU" + return retval; + } +static const str::EncodedString& classificationText_iso8859_1() +{ + static const str::EncodedString retval(str::cast("A\xc9IOU")); // ISO8859-1 "A�IOU" + return retval; + } +// UTF-16 on Windows, UTF-32 on Linux +static const wchar_t* classificationText_wide_() { return L"A\xc9IOU"; } // UTF-8 "A�IOU" +static str::EncodedString classificationText_wide() { return str::EncodedString(classificationText_wide_()); } +static std::string classificationText_platform() { return + sys::Platform == sys::PlatformType::Linux ? classificationText_utf_8().native() : classificationText_iso8859_1().native(); } + +TEST_CASE(test_u8string_to_string) +{ + { + const auto utf8 = classificationText_utf_8().u8string(); + const str::EncodedStringView utf8View(utf8); + const auto actual = utf8View.native(); + TEST_ASSERT_EQ(classificationText_platform(), actual); + } + { + const auto utf8 = classificationText_iso8859_1().u8string(); + const str::EncodedStringView utf8View(utf8); + const auto actual = utf8View.native(); + TEST_ASSERT_EQ(classificationText_platform(), actual); + } +} + +TEST_CASE(test_u8string_to_u16string) +{ + #if _WIN32 + const auto actual = classificationText_utf_8().u16string(); + const std::wstring s = str::c_str(actual); // Windows: std::wstring == std::u16string + TEST_ASSERT(classificationText_wide_() == s); // _EQ wants to do toString() + #endif + + TEST_ASSERT_EQ(classificationText_wide(), classificationText_utf_8()); + TEST_ASSERT_EQ(classificationText_wide(), classificationText_iso8859_1()); + + TEST_ASSERT(classificationText_wide().u16string() == classificationText_utf_8().u16string()); // _EQ wants to do toString() + TEST_ASSERT(classificationText_wide().u16string() == classificationText_iso8859_1().u16string()); // _EQ wants to do toString() +} + +TEST_CASE(test_u8string_to_u32string) +{ + #if !_WIN32 + const auto actual = classificationText_utf_8().u32string(); + const std::wstring s = str::c_str(actual); // Linux: std::wstring == std::u32string + TEST_ASSERT(classificationText_wide_() == s); // _EQ wants to do toString() + #endif + + TEST_ASSERT_EQ(classificationText_wide(), classificationText_utf_8()); + TEST_ASSERT_EQ(classificationText_wide(), classificationText_iso8859_1()); + + TEST_ASSERT(classificationText_wide().u32string() == classificationText_utf_8().u32string()); // _EQ wants to do toString() + TEST_ASSERT(classificationText_wide().u32string() == classificationText_iso8859_1().u32string()); // _EQ wants to do toString() +} + +static void test_EncodedStringView_(const std::string& testName, + const str::EncodedStringView& utf_8_view, const str::EncodedStringView& iso8859_1_view) +{ + (void)testName; + TEST_ASSERT_EQ(iso8859_1_view, iso8859_1_view); + TEST_ASSERT_EQ(utf_8_view, utf_8_view); + TEST_ASSERT_EQ(iso8859_1_view, utf_8_view); + TEST_ASSERT_EQ(utf_8_view, iso8859_1_view); + + TEST_ASSERT_EQ(iso8859_1_view.native(), utf_8_view.native()); + const auto native = classificationText_platform(); + TEST_ASSERT_EQ(iso8859_1_view.native(), native); + TEST_ASSERT_EQ(utf_8_view.native(), native); + + TEST_ASSERT(utf_8_view == classificationText_utf_8()); + TEST_ASSERT_EQ(utf_8_view, classificationText_utf_8()); + TEST_ASSERT(iso8859_1_view == classificationText_utf_8()); + TEST_ASSERT_EQ(iso8859_1_view, classificationText_utf_8()); + TEST_ASSERT(iso8859_1_view.u8string() == utf_8_view.u8string()); + + const auto expected = str::EncodedString::details::string(classificationText_utf_8()); + { + const auto actual = utf_8_view.asUtf8(); + TEST_ASSERT_EQ(actual, expected); + } + { + const auto actual = iso8859_1_view.asUtf8(); + TEST_ASSERT_EQ(actual, expected); + } +} +TEST_CASE(test_EncodedStringView) +{ + str::EncodedStringView esv; + auto copy(esv); + copy = esv; // assignment + + { + auto utf_8_view(classificationText_utf_8().view()); + auto iso8859_1_view(classificationText_iso8859_1().view()); + test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + + utf_8_view = classificationText_iso8859_1().view(); + iso8859_1_view = classificationText_utf_8().view(); + test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + } + { + auto utf_8_view = classificationText_utf_8().view(); + auto iso8859_1_view = classificationText_iso8859_1().view(); + test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + + utf_8_view = classificationText_iso8859_1().view(); + iso8859_1_view = classificationText_utf_8().view(); + test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + } + { + str::EncodedStringView utf_8_view; + utf_8_view = classificationText_iso8859_1().view(); + str::EncodedStringView iso8859_1_view; + iso8859_1_view = classificationText_utf_8().view(); + test_EncodedStringView_(testName, utf_8_view, iso8859_1_view); + } +} + +TEST_CASE(test_EncodedString) +{ + str::EncodedString es; + TEST_ASSERT_TRUE(es.empty()); + TEST_ASSERT_TRUE(es.native().empty()); + { + str::EncodedString es_copy(es); // copy + TEST_ASSERT_TRUE(es_copy.empty()); + TEST_ASSERT_TRUE(es_copy.native().empty()); + } + es = str::EncodedString("abc"); // assignment + TEST_ASSERT_EQ(es.native(), "abc"); + { + str::EncodedString es_copy(es); // copy, again; this time w/o default content + TEST_ASSERT_EQ(es_copy.native(), "abc"); + } + + str::EncodedString abc(es); // copy, for use below + TEST_ASSERT_EQ(abc.native(), "abc"); + + str::EncodedString es2; + es = std::move(es2); // move assignment + TEST_ASSERT_TRUE(es.empty()); + TEST_ASSERT_TRUE(es.native().empty()); + str::EncodedString abc_(abc); // copy + es = std::move(abc_); // move assignment, w/o default content + TEST_ASSERT_EQ(es.native(), "abc"); + + str::EncodedString es3(std::move(abc)); // move constructor + TEST_ASSERT_EQ(es3.native(), "abc"); +} +TEST_MAIN( + TEST_CHECK(testConvert); + TEST_CHECK(testBadConvert); + TEST_CHECK(testEightBitIntToString); + TEST_CHECK(testCharToString); + TEST_CHECK(test_string_to_u8string_ascii); + TEST_CHECK(test_string_to_u8string_windows_1252); + TEST_CHECK(test_string_to_u8string_iso8859_1); + TEST_CHECK(test_change_case); + TEST_CHECK(test_u8string_to_string); + TEST_CHECK(test_u8string_to_u16string); + TEST_CHECK(test_u8string_to_u32string); + TEST_CHECK(test_EncodedStringView); + TEST_CHECK(test_EncodedString); ) \ No newline at end of file diff --git a/externals/coda-oss/modules/c++/sys/unittests/test_byte_swap.cpp b/externals/coda-oss/modules/c++/sys/unittests/test_byte_swap.cpp index 095bc40e3..fd2ee6f5e 100644 --- a/externals/coda-oss/modules/c++/sys/unittests/test_byte_swap.cpp +++ b/externals/coda-oss/modules/c++/sys/unittests/test_byte_swap.cpp @@ -31,10 +31,10 @@ TEST_CASE(testEndianness) { - /*const*/ auto native = coda_oss::endian::native; // "const" causes "conditional expression is constant." + /*const*/ auto native = std::endian::native; // "const" causes "conditional expression is constant." - if (native == coda_oss::endian::big) { } - else if (native == coda_oss::endian::little) { } + if (native == std::endian::big) { } + else if (native == std::endian::little) { } else { TEST_FAIL("Mixed-endian not supported!"); @@ -42,7 +42,7 @@ TEST_CASE(testEndianness) const bool isBigEndianSystem = sys::isBigEndianSystem(); - if (native == coda_oss::endian::big) + if (native == std::endian::big) { TEST_ASSERT(isBigEndianSystem); } @@ -50,7 +50,7 @@ TEST_CASE(testEndianness) { TEST_ASSERT(!isBigEndianSystem); } - if (native == coda_oss::endian::little) + if (native == std::endian::little) { TEST_ASSERT(!isBigEndianSystem); } @@ -62,11 +62,11 @@ TEST_CASE(testEndianness) if (isBigEndianSystem) { - TEST_ASSERT(native == coda_oss::endian::big); + TEST_ASSERT(native == std::endian::big); } else { - TEST_ASSERT(native == coda_oss::endian::little); + TEST_ASSERT(native == std::endian::little); } } diff --git a/externals/coda-oss/modules/c++/xml.lite/source/Element.cpp b/externals/coda-oss/modules/c++/xml.lite/source/Element.cpp index 2339b516a..5904d86c4 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/Element.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/Element.cpp @@ -52,7 +52,7 @@ std::unique_ptr xml::lite::Element::create(const QName& qnam } std::unique_ptr xml::lite::Element::createU8(const QName& qname, const std::string& characterData) { - return create(qname, str::to_u8string(characterData)); + return create(qname, str::EncodedStringView(characterData).u8string()); } xml::lite::Element::Element(const xml::lite::Element& node) diff --git a/externals/coda-oss/modules/c++/xml.lite/source/MinidomHandler.cpp b/externals/coda-oss/modules/c++/xml.lite/source/MinidomHandler.cpp index 17acfd1e4..347de87ce 100644 --- a/externals/coda-oss/modules/c++/xml.lite/source/MinidomHandler.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/source/MinidomHandler.cpp @@ -126,7 +126,7 @@ bool xml::lite::MinidomHandler::vcharacters(const void /*XMLCh*/* chars_, size_t auto platformEncoding = xml::lite::PlatformEncoding; // "conditional expression is constant" if (platformEncoding == xml::lite::StringEncoding::Utf8) { - str::details::to_u8string(pChars16, length, chars); + str::details::utf16to8(pChars16, length, chars); } else if (platformEncoding == xml::lite::StringEncoding::Windows1252) { diff --git a/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlparser.cpp b/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlparser.cpp index b1ac11ad3..87226c5ad 100644 --- a/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlparser.cpp +++ b/externals/coda-oss/modules/c++/xml.lite/unittests/test_xmlparser.cpp @@ -34,6 +34,11 @@ #include "xml/lite/MinidomParser.h" #include "xml/lite/Validator.h" +static inline std::u8string fromUtf8(const std::string& utf8) +{ + return str::EncodedStringView::fromUtf8(utf8).u8string(); +} + static const std::string text("TEXT"); static const std::string strXml = "" + text + ""; @@ -45,7 +50,7 @@ static const auto pIso88591Text_ = str::c_str(iso88591Text1252); static const auto utf8Text8 = utf8Text.u8string(); static const auto pUtf8Text_ = str::c_str(utf8Text8); -static const auto strUtf8Xml8 = str::fromUtf8("") + utf8Text8 + str::fromUtf8(""); +static const auto strUtf8Xml8 = fromUtf8("") + utf8Text8 + fromUtf8(""); static const std::string strUtf8Xml = str::c_str(strUtf8Xml8); constexpr auto PlatformEncoding = xml::lite::PlatformEncoding; @@ -403,8 +408,7 @@ TEST_CASE(testReadEmbeddedXml) std::u8string u8_characterData; classificationXML.getCharacterData(u8_characterData); TEST_ASSERT_EQ(u8_characterData, expectedCharDataView); - std::string u8_characterData_; - str::EncodedStringView(u8_characterData).toUtf8(u8_characterData_); + const auto u8_characterData_ = str::EncodedStringView(u8_characterData).asUtf8(); TEST_ASSERT_EQ(classificationText_utf_8, u8_characterData_); }