From 80ce9097f27155b94630fa65ed28b855f9f04cf3 Mon Sep 17 00:00:00 2001 From: "Robin E. R. Davies" Date: Wed, 27 Nov 2024 00:22:59 -0500 Subject: [PATCH] Dynamically load icu (Ubuntu) --- .vscode/launch.json | 3 +- .vscode/settings.json | 6 +- PiPedalCommon/src/CMakeLists.txt | 1 + PiPedalCommon/src/Utf8Utils.cpp | 340 ++++++++++++++++++++++++ PiPedalCommon/src/include/Utf8Utils.hpp | 50 ++++ docs/BuildPrerequisites.md | 2 +- src/ArmPerformanceCounters.hpp | 8 +- src/CMakeLists.txt | 8 +- src/Locale.cpp | 331 ++++++++++++++++++++--- src/Locale.hpp | 2 + src/LocaleTest.cpp | 285 ++++++++++++++++++++ 11 files changed, 992 insertions(+), 44 deletions(-) create mode 100644 PiPedalCommon/src/Utf8Utils.cpp create mode 100644 PiPedalCommon/src/include/Utf8Utils.hpp create mode 100644 src/LocaleTest.cpp diff --git a/.vscode/launch.json b/.vscode/launch.json index 53bc7b34..51dfddb1 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -138,7 +138,8 @@ //"[inverting_mutex_test]" // "[utf8_to_utf32]" //"[pipedal_alsa_test]" - "[wifi_channels_test]" + // "[wifi_channels_test]" + // "[locale]" ], "stopAtEntry": false, diff --git a/.vscode/settings.json b/.vscode/settings.json index 029cef12..66b150ea 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -98,7 +98,11 @@ "p2p_i.h": "c", "p2p.h": "c", "hash_set": "cpp", - "barrier": "cpp" + "barrier": "cpp", + "format": "cpp", + "locale": "cpp", + "stdfloat": "cpp", + "text_encoding": "cpp" }, "cSpell.words": [ "Guitarix", diff --git a/PiPedalCommon/src/CMakeLists.txt b/PiPedalCommon/src/CMakeLists.txt index 3926a61b..9ab01b68 100644 --- a/PiPedalCommon/src/CMakeLists.txt +++ b/PiPedalCommon/src/CMakeLists.txt @@ -57,6 +57,7 @@ add_library(PiPedalCommon STATIC include/dbus/org.freedesktop.NetworkManager.Device.Wireless.hpp include/dbus/org.freedesktop.NetworkManager.DHCP4Config.hpp + Utf8Utils.cpp include/Utf8Utils.hpp NetworkManagerInterfaces.cpp include/NetworkManagerInterfaces.hpp Lv2Log.cpp include/Lv2Log.hpp diff --git a/PiPedalCommon/src/Utf8Utils.cpp b/PiPedalCommon/src/Utf8Utils.cpp new file mode 100644 index 00000000..f9f85b93 --- /dev/null +++ b/PiPedalCommon/src/Utf8Utils.cpp @@ -0,0 +1,340 @@ +// Copyright (c) 2023 Robin E. R. Davies +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software is furnished to do so, +// subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#include "Utf8Utils.hpp" +#include "stdexcept" +#include + +using namespace std; +namespace pipedal +{ + + + static constexpr uint16_t UTF16_SURROGATE_TAG_MASK = 0xFC00; + static constexpr uint16_t UTF16_SURROGATE_1_BASE = 0xD800U; + static constexpr uint16_t UTF16_SURROGATE_2_BASE = 0xDC00U; + + + static constexpr uint32_t UTF8_ONE_BYTE_MASK = 0x80; + static constexpr uint32_t UTF8_ONE_BYTE_BITS = 0; + static constexpr uint32_t UTF8_TWO_BYTES_MASK = 0xE0; + static constexpr uint32_t UTF8_TWO_BYTES_BITS = 0xC0; + static constexpr uint32_t UTF8_THREE_BYTES_MASK = 0xF0; + static constexpr uint32_t UTF8_THREE_BYTES_BITS = 0xE0; + static constexpr uint32_t UTF8_FOUR_BYTES_MASK = 0xF8; + static constexpr uint32_t UTF8_FOUR_BYTES_BITS = 0xF0; + static constexpr uint32_t UTF8_CONTINUATION_MASK = 0xC0; + static constexpr uint32_t UTF8_CONTINUATION_BITS = 0x80; + static constexpr bool enforceValidUtf8Encoding = false; + + namespace implementation + { + void Utf8RangeError() + { + throw std::range_error("String index out of range."); + } + } + size_t Utf8Index(size_t size, const std::string &text) + { + size_t index = 0; + for (size_t i = 0; i < size; ++i) + { + index = Utf8Increment(index, text); + if (index >= text.size()) + break; + } + return index; + } + size_t Utf8Length(const std::string &text) + { + size_t index = 0; + size_t length = 0; + while (index < text.length()) + { + index = Utf8Increment(index, text); + ++length; + } + return length; + } + size_t Utf8Increment(size_t size, const std::string &text) + { + if (size >= text.length()) + { + implementation::Utf8RangeError(); + } + uint8_t uc = (uint8_t)text[size]; + if (uc < 0x80U) + { + return size + 1; + } + else + { + ++size; + while (size < text.size() && (((uint8_t)text[size]) & 0xC0) == 0x80) + { + ++size; + } + return size; + } + } + size_t Utf8Decrement(size_t size, const std::string &text) + { + if (size == 0) + { + implementation::Utf8RangeError(); + } + uint8_t c = text[size - 1]; + if (c < 0x80U) + { + return size - 1; + } + else + { + while ((uint8_t)(text[size - 1] & 0xC0) == 0x80) + { + --size; + } + if (size != 0 && (uint8_t)(text[size - 1]) > 0x80) + { + --size; + } + else + { + implementation::Utf8RangeError(); + } + + return size; + } + } + + std::string Utf8Erase(const std::string &text, size_t start, size_t end) + { + size_t uStart = Utf8Index(start, text); + size_t uEnd = Utf8Index(end, text); + std::string result; + result.reserve(uStart + text.size() - uEnd); + result.append(text.begin(), text.begin() + uStart); + result.append(text.begin() + uEnd, text.end()); + return result; + } + std::string Utf8Substring(const std::string &text, size_t start, size_t end) + { + size_t uStart = Utf8Index(start, text); + size_t uEnd; + if (end == (size_t)-1) + { + uEnd = text.length(); + } + else + { + uEnd = Utf8Index(end, text); + } + return text.substr(uStart, uEnd - uStart); + } + + static char32_t bits(int nBits, int shift, char32_t c) + { + return (c >> shift) & ((1 << nBits) - 1); + } + std::string Utf8FromUtf32(char32_t value) + { + std::stringstream ss; + if (value < 0x7F) + { + ss << (char)value; + } + else if (value <= 0x7FF) + { + ss << (char)(0xC0 + bits(5, 6, value)); + ss << (char)(0x80 + bits(6, 0, value)); + } + else if (value <= 0xFFFFul) + { + ss << (char)(0xE0 + bits(4, 12, value)); + ss << (char)(0x80 + bits(6, 6, value)); + ss << (char)(0x80 + bits(6, 0, value)); + } + else if (value <= 0x10FFFFul) + { + ss << (char)(0xF0 + bits(3, 18, value)); + ss << (char)(0x80 + bits(6, 12, value)); + ss << (char)(0x80 + bits(6, 6, value)); + ss << (char)(0x80 + bits(6, 0, value)); + } + else + { + throw std::runtime_error("Invalid unicode character."); + } + return ss.str(); + } + + std::string Utf16ToUtf8(const std::u16string_view&v) + { + std::stringstream ss; + for (auto i = v.begin(); i != v.end(); /**/) + { + char32_t value = *i++; + if ((value & UTF16_SURROGATE_TAG_MASK) == UTF16_SURROGATE_1_BASE) + { + if (i == v.end()) + { + throw std::runtime_error("Invalid UTF32 character sequence."); + } + char32_t value2 = *i++; + if ((value & UTF16_SURROGATE_TAG_MASK) != UTF16_SURROGATE_2_BASE) + { + throw std::runtime_error("Invalid UTF32 character sequence."); + } + value = (value << 10) + value2; + } + + if (value < 0x7F) + { + ss << (char)value; + } + else if (value <= 0x7FF) + { + ss << (char)(0xC0 + bits(5, 6, value)); + ss << (char)(0x80 + bits(6, 0, value)); + } + else if (value <= 0xFFFFul) + { + ss << (char)(0xE0 + bits(4, 12, value)); + ss << (char)(0x80 + bits(6, 6, value)); + ss << (char)(0x80 + bits(6, 0, value)); + } + else if (value <= 0x10FFFFul) + { + ss << (char)(0xF0 + bits(3, 18, value)); + ss << (char)(0x80 + bits(6, 12, value)); + ss << (char)(0x80 + bits(6, 6, value)); + ss << (char)(0x80 + bits(6, 0, value)); + } + else + { + throw std::runtime_error("Invalid unicode character."); + } + } + return ss.str(); + } + + + [[noreturn]] static void throw_encoding_error() + { + throw std::runtime_error("Invalid UTF8 character."); + } + + static uint32_t continuation_byte(std::string_view::iterator &p, std::string_view::const_iterator end) + { + if (p == end) + throw_encoding_error(); + uint8_t c = *p++; + if ((c & UTF8_CONTINUATION_MASK) != UTF8_CONTINUATION_BITS) + throw_encoding_error(); + return c & 0x3F; + } + + std::u16string Utf8ToUtf16(const std::string_view &v) + { + std::basic_stringstream os; + // convert to utf-32. + // convert utf-32 to normalized utf-16. + // write non-7-bit and unsafe characters as \uHHHH. + + auto p = v.begin(); + while (p != v.end()) + { + uint32_t uc; + uint8_t c = (uint8_t)*p++; + if ((c & UTF8_ONE_BYTE_MASK) == UTF8_ONE_BYTE_BITS) + { + uc = c; + } + else + { + uint32_t c2 = continuation_byte(p, v.end()); + + if ((c & UTF8_TWO_BYTES_MASK) == UTF8_TWO_BYTES_BITS) + { + uint32_t c1 = c & (uint32_t)(~UTF8_TWO_BYTES_MASK); + if (c1 <= 1 && enforceValidUtf8Encoding) + { + // overlong encoding. + throw_encoding_error(); + } + uc = (c1 << 6) | c2; + } + else + { + uint32_t c3 = continuation_byte(p, v.end()); + + if ((c & UTF8_THREE_BYTES_MASK) == UTF8_THREE_BYTES_BITS) + { + uint32_t c1 = c & (uint32_t)~UTF8_THREE_BYTES_MASK; + if (c1 == 0 && c2 < 0x20 && enforceValidUtf8Encoding) + { + // overlong encoding. + throw_encoding_error(); + } + + uc = (c1) << 12 | (c2 << 6) | c3; + } + else + { + uint32_t c4 = continuation_byte(p, v.end()); + if ((c & UTF8_FOUR_BYTES_MASK) == UTF8_FOUR_BYTES_BITS) + { + uint32_t c1 = c & (uint32_t)~UTF8_FOUR_BYTES_MASK; + if (c1 == 0 && c2 < 0x10 && enforceValidUtf8Encoding) + { + // overlong encoding. + throw_encoding_error(); + } + uc = (c1 << 18) | (c2 << 12) | (c3 << 6) | c4; + } + else + { + // outside legal UCS range. + throw_encoding_error(); + } + } + } + } + if (uc < 0x10000ul) + { + os << (char16_t)uc; + } + else + { + // write UTF-16 surrogate pair. + uc -= 0x10000; + + char16_t s1 = (char16_t)(UTF16_SURROGATE_1_BASE + ((uc >> 10) & 0x3FFu)); + char16_t s2 = (char16_t)(UTF16_SURROGATE_2_BASE + (uc & 0x03FFu)); + // surrogate pair. + os << s1; + os << s2; + } + } + return os.str(); + } + + + +} \ No newline at end of file diff --git a/PiPedalCommon/src/include/Utf8Utils.hpp b/PiPedalCommon/src/include/Utf8Utils.hpp new file mode 100644 index 00000000..5b31af4b --- /dev/null +++ b/PiPedalCommon/src/include/Utf8Utils.hpp @@ -0,0 +1,50 @@ +// Copyright (c) 2023 Robin E. R. Davies +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software is furnished to do so, +// subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#pragma once + + +#include +#include +#include + +namespace pipedal { + size_t Utf8Index(size_t size, const std::string &text); + size_t Utf8Decrement(size_t size, const std::string &text); + size_t Utf8Increment(size_t size, const std::string &text); + size_t Utf8Length(const std::string&text); + + std::string Utf8Erase(const std::string&text, size_t start, size_t end); + std::string Utf8Substring(const std::string&text, size_t start, size_t end); + inline std::string Utf8Substring(const std::string&text, size_t start) { + return Utf8Substring(text,start,(size_t)-1); + } + namespace implementation { + void Utf8RangeError(); + } + + std::string Utf8FromUtf32(char32_t value); + + + std::u16string Utf8ToUtf16(const std::string_view&s); + std::string Utf16ToUtf8(const std::u16string_view&s); + + +} + diff --git a/docs/BuildPrerequisites.md b/docs/BuildPrerequisites.md index 8dfed233..40adbc50 100644 --- a/docs/BuildPrerequisites.md +++ b/docs/BuildPrerequisites.md @@ -16,7 +16,7 @@ with # install NodeJS latest LTS release. curl -fsSL https://deb.nodesource.com/setup_20.x | sudo -E bash - - sudo apt-get install -y nodejs + sudo apt-get install -y nodejs npm Run the following commands to install dependent libraries required by the PiPedal build. diff --git a/src/ArmPerformanceCounters.hpp b/src/ArmPerformanceCounters.hpp index 8437dc84..335139eb 100644 --- a/src/ArmPerformanceCounters.hpp +++ b/src/ArmPerformanceCounters.hpp @@ -100,8 +100,8 @@ namespace pipedal long long get_l1_misses() { - long long count; - read(fd_l1, &count, sizeof(long long)); + long long count = 0; + auto _ = read(fd_l1, &count, sizeof(long long)); return count; } @@ -111,8 +111,8 @@ namespace pipedal { return -1; } - long long count; - read(fd_l2, &count, sizeof(long long)); + long long count = 0; + auto _ = read(fd_l2, &count, sizeof(long long)); return count; } }; diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e0a769d9..325ee997 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -7,7 +7,7 @@ set (USE_PCH 1) set (ENABLE_BACKTRACE 1) -set (USE_SANITIZE true) +set (USE_SANITIZE OFF) # seems to be broken on Ubuntu 24.10 set(CXX_STANDARD 20) @@ -284,7 +284,6 @@ set(PIPEDAL_LIBS libpipedald zip ${VST3_LIBRARIES} ${LILV_0_LIBRARIES} # ${JACK_LIBRARIES} - pending delete for JACK support. - ${ICU_LIBRARIES} ) @@ -332,7 +331,6 @@ set_target_properties(hotspotManagerTest PROPERTIES EXCLUDE_FROM_ALL true) add_executable(pipedaltest testMain.cpp - InvertingMutexTest.cpp jsonTest.cpp UpdaterTest.cpp @@ -344,6 +342,8 @@ add_executable(pipedaltest testMain.cpp PiPedalAlsaTest.cpp UnixSocketTest.cpp + LocaleTest.cpp + Lv2HostLeakTest.cpp @@ -353,7 +353,7 @@ add_executable(pipedaltest testMain.cpp MemDebug.cpp MemDebug.hpp ) -target_link_libraries(pipedaltest PRIVATE ${PIPEDAL_LIBS}) +target_link_libraries(pipedaltest PRIVATE ${PIPEDAL_LIBS} ${ICU_LIBRARIES}) target_include_directories(pipedaltest PRIVATE ${PIPEDAL_INCLUDES} ) diff --git a/src/Locale.cpp b/src/Locale.cpp index a3f353f9..97121505 100644 --- a/src/Locale.cpp +++ b/src/Locale.cpp @@ -17,9 +17,23 @@ // IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN // CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/* + libicu does major version bumps without regard to whether api changes are major or minor. Documention + does, in fact, guarantee binary compatibility for the APIs that we are using. The major version bump + creates absolute havoc with .deb packaging, because the major version bumps on roughluy a monthyly basis. + This makes it impossible to generate .deb packages that are plausible portable between debian-derived distros. + + The alternative: dynmaically link to the APIs that we are using, referencing non-major-versioned references + to the libicu dlls (e.g. libicuuc.lib instead of libicuuc.lib.74). Given how few APIs we are actually using, + this seems perfectly reasonable. fwiw, .net runtime uses a similar approach. + +*/ + #include "pch.h" #include "Locale.hpp" - +#include "ss.hpp" +#include +#include "Utf8Utils.hpp" #define U_SHOW_CPLUSPLUS_API 0 @@ -28,30 +42,244 @@ #include #include #include +#include #include #include "ss.hpp" #include +#include using namespace pipedal; +using namespace std; + +// Function to get ICU version dynamically +int getICUVersion(void* libHandle) { + + // pares the dlerror to get the version number. :-/ + dlerror(); // clear the error. + void * nonExistentFunction = dlsym(libHandle, "nonExistentFunction"); + + std::string error = dlerror(); + // "/lib/aarch64-linux-gnu/libicui18n.so.74: undefined symbol: nonExistentFunction" + + auto nPos = error.find(".so."); + if (nPos == std::string::npos) + { + throw std::runtime_error("Unable to determine version of libicui18n.so"); + } + const char *p = error.c_str() + nPos + 4; + + int version = 0; + while (*p >= '0' && *p <= '9') + { + version = version*10 + *p-'0'; + ++p; + } + if (*p != ':' && *p != '.') + { + throw std::runtime_error("Unable to determine version of libicui18n.so"); + } + return version; +} + + +class DynamicIcuLoader +{ +public: + using ptr = std::shared_ptr; + // Function pointer types + typedef UCollator *(*ucol_open_t)(const char *, UErrorCode *); + typedef void (*ucol_close_t)(UCollator *); + typedef UCollationResult (*ucol_strcoll_t)(const UCollator *, const UChar *, int32_t, const UChar *, int32_t); + using ucol_setStrength_t = typeof(&ucol_setStrength); + + static ptr icuLoader; + static std::mutex icuLoaderMutex; + + static DynamicIcuLoader::ptr get_instance() + { + std::lock_guard lock{icuLoaderMutex}; + + if (!icuLoader) + { + icuLoader = std::make_shared(); + } + return icuLoader; + } + + ~DynamicIcuLoader() + { + // Close the library if it was opened + if (library_handle) + { + dlclose(library_handle); + library_handle = nullptr; + } + } + + ucol_open_t ucol_open_fn; + ucol_close_t ucol_close_fn; + ucol_strcoll_t ucol_strcoll_fn; + ucol_setStrength_t ucol_setStrength_fn; + + DynamicIcuLoader() : library_handle(nullptr), + ucol_open_fn(nullptr), + ucol_close_fn(nullptr), + ucol_strcoll_fn(nullptr) + { +#ifndef DISABLE_DYNAMIC_ICU_LOADER + load(); +#else + this->ucol_open_fn = &ucol_open; + this->ucol_close_fn = &ucol_close; + this->ucol_strcoll_fn = &ucol_strcoll; + this->ucol_setStrength_fn = &ucol_setStrength; +#endif + } + +private: + void *library_handle; + // Function pointers + static std::string VersionedName(const char*name,int version) + { + std::stringstream ss; + ss << name << "_" << version; + return ss.str(); + } + void load() + { + try + { + // Open the library + library_handle = dlopen("libicui18n.so", RTLD_LAZY); + if (!library_handle) + { + throw std::runtime_error(SS("Error loading library: " << dlerror())); + } + + + int version = getICUVersion(library_handle); + + // Clear any existing errors + dlerror(); + + // Load ucol_open + ucol_open_fn = reinterpret_cast(dlsym(library_handle, VersionedName("ucol_open",version).c_str())); + if (!ucol_open_fn) + { + + throw std::runtime_error(SS("Error loading ucol_open: " << dlerror())); + } + + // Load ucol_close + ucol_close_fn = reinterpret_cast(dlsym(library_handle, VersionedName("ucol_close",version).c_str())); + if (!ucol_close_fn) + { + throw std::runtime_error(SS("Error loading ucol_close: " << dlerror())); + } + + // Load ucol_strcoll + ucol_strcoll_fn = reinterpret_cast(dlsym(library_handle, VersionedName("ucol_strcoll",version).c_str())); + if (!ucol_strcoll_fn) + { + throw std::runtime_error(SS("Error loading ucol_strcoll: " << dlerror())); + } + this->ucol_setStrength_fn = reinterpret_cast(dlsym(library_handle, VersionedName("ucol_setStrength",version).c_str())); + if (!ucol_setStrength_fn) + { + throw std::runtime_error(SS("Error loading ucol_setStrength: " << dlerror())); + } + } + catch (const std::exception &e) + { + Lv2Log::warning(e.what()); + this->ucol_open_fn = fallback_ucol_open_func; + this->ucol_close_fn = fallback_ucol_close_func; + this->ucol_strcoll_fn = fallback_ucol_strcoll_func; + this->ucol_setStrength_fn = fallback_ucol_setStrength_func; + } + } + + static void fallback_ucol_setStrength_func(UCollator *coll, + UCollationStrength strength) + { + + } + + + static UCollator *fallback_ucol_open_func(const char *locale, UErrorCode *ec) + { + *ec = UErrorCode::U_ZERO_ERROR; + return (UCollator *)(void *)-1; + } + static void fallback_ucol_close_func(UCollator *) + { + } + static UCollationResult fallback_ucol_strcoll_func(const UCollator *, const UChar *left, int32_t nLeft, const UChar *right, int32_t nRight) + { + auto c = std::min(nLeft, nRight); + + for (int32_t i = 0; i < c; ++i) + { + UChar cl = *left++; + UChar cr = *right++; + if (cl != cr) + { + if (cl >= 'A' && cl <= 'Z') + { + cl += 'A' - 'a'; + } + if (cr >= 'A' && cr <= 'Z') + { + cr += 'A' - 'a'; + } + if (cl != cr) + { + if (cl < cr) + { + return UCollationResult::UCOL_LESS; + } + else + { + return UCollationResult::UCOL_GREATER; + } + } + } + } + return UCollationResult::UCOL_EQUAL; + } +}; -std::string getCurrentLocale() { +DynamicIcuLoader::ptr DynamicIcuLoader::icuLoader; +std::mutex DynamicIcuLoader::icuLoaderMutex; + +std::string getCurrentLocale() +{ std::string locale = setlocale(LC_ALL, nullptr); - if (locale.empty() || locale == "C") { + if (locale.empty() || locale == "C") + { // If setlocale fails, try getting it from environment variables - const char* lang = getenv("LC_ALL"); - if (lang) { + const char *lang = getenv("LC_ALL"); + if (lang) + { locale = lang; - } else { + } + else + { // If LANG is not set, fall back to a default lang = getenv("LC_COLLATE"); if (lang) { locale = lang; - } else{ + } + else + { lang = getenv("LANG"); - if (lang) { + if (lang) + { locale = lang; - } else { + } + else + { locale = "en_US"; } } @@ -59,61 +287,87 @@ std::string getCurrentLocale() { } // Extract just the language and country code size_t dot_pos = locale.find('.'); - if (dot_pos != std::string::npos) { + if (dot_pos != std::string::npos) + { locale = locale.substr(0, dot_pos); } return locale; } -Collator::~Collator() { +Collator::~Collator() +{ } class LocaleImpl; -class CollatorImpl : public Collator { +class CollatorImpl : public Collator +{ public: - CollatorImpl(std::shared_ptr localeImpl, const char* localeStr); + CollatorImpl(std::shared_ptr localeImpl, const char *localeStr); ~CollatorImpl(); - virtual int Compare(const std::string &left, const std::string&right); + virtual int Compare(const std::string &left, const std::string &right); + virtual int Compare(const std::u16string &left, const std::u16string &right); + private: - UCollator* collator = nullptr; + UCollator *collator = nullptr; std::shared_ptr localeImpl; + + DynamicIcuLoader::ptr icuLoader; }; CollatorImpl::~CollatorImpl() { - if (collator) { - ucol_close(collator); + if (collator) + { + icuLoader->ucol_close_fn(collator); + collator = nullptr; } localeImpl = nullptr; } -CollatorImpl::CollatorImpl(std::shared_ptr localeImpl, const char* localeStr) -: localeImpl(localeImpl) +CollatorImpl::CollatorImpl(std::shared_ptr localeImpl, const char *localeStr) + : localeImpl(localeImpl) { + icuLoader = DynamicIcuLoader::get_instance(); UErrorCode status = U_ZERO_ERROR; - collator = ucol_open(localeStr, &status); + collator = icuLoader->ucol_open_fn(localeStr, &status); - if (U_FAILURE(status)) { - throw std::runtime_error(SS("Failed to create collator: " << u_errorName(status))); + if (U_FAILURE(status)) + { + throw std::runtime_error(SS("Failed to create collator: " << status)); } + icuLoader->ucol_setStrength_fn(collator,UCollationStrength::UCOL_PRIMARY); } -int CollatorImpl::Compare(const std::string &left, const std::string&right) { - UErrorCode status = U_ZERO_ERROR; - return ucol_strcoll(collator, - reinterpret_cast(left.c_str()), left.length(), - reinterpret_cast(right.c_str()), right.length()); +int CollatorImpl::Compare(const std::u16string &left, const std::u16string&right) +{ + return icuLoader->ucol_strcoll_fn(collator, + reinterpret_cast(left.c_str()), left.length(), + reinterpret_cast(right.c_str()), right.length()); + +} + +int CollatorImpl::Compare(const std::string &left_, const std::string &right_) +{ + std::u16string left = Utf8ToUtf16(left_); + std::u16string right = Utf8ToUtf16(right_); + + return icuLoader->ucol_strcoll_fn(collator, + reinterpret_cast(left.c_str()), left.length(), + reinterpret_cast(right.c_str()), right.length()); } Locale::ptr g_instance; -class LocaleImpl: public Locale, public std::enable_shared_from_this { +class LocaleImpl : public Locale, public std::enable_shared_from_this +{ public: LocaleImpl(); - virtual const std::string &CurrentLocale() const ; + LocaleImpl(const std::string&locale); + virtual const std::string &CurrentLocale() const; virtual Collator::ptr GetCollator(); + private: std::string currentLocale; }; @@ -122,20 +376,26 @@ LocaleImpl::LocaleImpl() { currentLocale = getCurrentLocale(); } +LocaleImpl::LocaleImpl(const std::string& locale) +{ + currentLocale = locale; +} + -const std::string &LocaleImpl::CurrentLocale() const +const std::string &LocaleImpl::CurrentLocale() const { return currentLocale; } -Collator::ptr LocaleImpl::GetCollator(){ +Collator::ptr LocaleImpl::GetCollator() +{ auto pThis = shared_from_this(); return std::shared_ptr(new CollatorImpl(pThis, currentLocale.c_str())); } static std::mutex createMutex; -Locale::~Locale() +Locale::~Locale() { } @@ -143,7 +403,7 @@ Locale::ptr Locale::g_instance; Locale::ptr Locale::GetInstance() { - std::lock_guard lock { createMutex}; + std::lock_guard lock{createMutex}; if (g_instance) { @@ -151,4 +411,9 @@ Locale::ptr Locale::GetInstance() } g_instance = std::make_shared(); return g_instance; +} + +Locale::ptr Locale::GetTestInstance(const std::string&locale) +{ + return std::make_shared(locale); } \ No newline at end of file diff --git a/src/Locale.hpp b/src/Locale.hpp index 9d0ae1b8..068b4d8b 100644 --- a/src/Locale.hpp +++ b/src/Locale.hpp @@ -27,6 +27,7 @@ namespace pipedal { public: using ptr = std::shared_ptr; virtual int Compare(const std::string &left, const std::string&right) = 0; + virtual int Compare(const std::u16string &left, const std::u16string&right) = 0; virtual ~Collator(); }; class Locale { @@ -43,6 +44,7 @@ namespace pipedal { using ptr = std::shared_ptr; static ptr GetInstance(); + static ptr GetTestInstance(const std::string&locale); // testing only. virtual const std::string &CurrentLocale() const = 0; virtual Collator::ptr GetCollator() = 0; diff --git a/src/LocaleTest.cpp b/src/LocaleTest.cpp new file mode 100644 index 00000000..1ba36782 --- /dev/null +++ b/src/LocaleTest.cpp @@ -0,0 +1,285 @@ +// Copyright (c) 2022 Robin Davies +// +// Permission is hereby granted, free of charge, to any person obtaining a copy of +// this software and associated documentation files (the "Software"), to deal in +// the Software without restriction, including without limitation the rights to +// use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +// the Software, and to permit persons to whom the Software is furnished to do so, +// subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +#include "pch.h" +#include "catch.hpp" +#include +#include + +#include // before Locale.hpp +#include +#include +#include + +#include "Locale.hpp" +#include +#include +#include + +using namespace pipedal; +using namespace std; + + +static std::string ToUtf8(const icu::UnicodeString &value) +{ + std::string result; + icu::StringByteSink sink(&result); + value.toUTF8(sink); + + sink.Flush(); + return result; +} + + +class LocaleEnumerator { +public: + // Enumerate all available locales + static std::vector getAllLocales() { + std::vector locales; + + // Get the number of available locales + int32_t count = 0; + const icu::Locale* availableLocales = icu::Locale::getAvailableLocales(count); + + for (int32_t i = 0; i < count; ++i) { + const icu::Locale& locale = availableLocales[i]; + + // Construct full locale name + std::string localeName = constructLocaleName(locale); + + locales.push_back(localeName); + } + + // Sort locales alphabetically + std::sort(locales.begin(), locales.end()); + + return locales; + } + + // Detailed locale information + static void printLocaleDetails(const std::string& localeName) { + try { + icu::Locale locale(localeName.c_str()); + + // Display locale information + std::cout << "Locale: " << localeName << "\n"; + + // Language display name + icu::UnicodeString languageName; + locale.getDisplayLanguage(locale, languageName); + std::cout << " Language: " << ToUtf8(languageName) << "\n"; + + // Country display name + icu::UnicodeString countryName; + locale.getDisplayCountry(locale, countryName); + std::cout << " Country: " << ToUtf8(countryName) << "\n"; + + // Variant information + icu::UnicodeString variantName; + locale.getDisplayVariant(locale, variantName); + if (!variantName.isEmpty()) { + std::cout << " Variant: " << ToUtf8(variantName) << "\n"; + } + UErrorCode status = U_ZERO_ERROR; + + std::unique_ptr collator(icu::Collator::createInstance(localeName.c_str(),status)); + icu::Collator::ECollationStrength strength = collator->getStrength(); + std::cout << " Default strength: " << strength << "\n"; + + } + catch (const std::exception& e) { + std::cerr << "Error processing locale " << localeName << ": " << e.what() << std::endl; + } + } + + // Enumerate locales by language or country + static std::vector getLocalesByLanguage(const std::string& language) { + std::vector matchingLocales; + + int32_t count = 0; + const icu::Locale* availableLocales = icu::Locale::getAvailableLocales(count); + + for (int32_t i = 0; i < count; ++i) { + const icu::Locale& locale = availableLocales[i]; + + if (language == locale.getLanguage()) { + std::string localeName = constructLocaleName(locale); + matchingLocales.push_back(localeName); + } + } + + std::sort(matchingLocales.begin(), matchingLocales.end()); + return matchingLocales; + } + +private: + // Construct a comprehensive locale name + static std::string constructLocaleName(const icu::Locale& locale) { + std::string localeName; + + const char* lang = locale.getLanguage(); + const char* country = locale.getCountry(); + const char* variant = locale.getVariant(); + + if (lang && *lang) { + localeName += lang; + + if (country && *country) { + localeName += "_"; + localeName += country; + + if (variant && *variant) { + localeName += "."; + localeName += variant; + } + } + } + + return localeName; + } +}; + +int enumerateLocales() { + try { + // Get and print total number of available locales + int32_t count = 0; + icu::Locale::getAvailableLocales(count); + std::cout << "Total Available Locales: " << count << "\n\n"; + + // Demonstrate different locale enumeration methods + + // 1. Get all locales + std::vector allLocales = LocaleEnumerator::getAllLocales(); + for (size_t i = 0; i < allLocales.size(); ++i) { + std::cout << allLocales[i] << "\n"; + } + + // 2. Get locales by specific language (e.g., English) + std::cout << "\nFrench Locales:\n"; + std::vector englishLocales = LocaleEnumerator::getLocalesByLanguage("fr"); + for (const auto& locale : englishLocales) { + std::cout << locale << "\n"; + } + + // 3. Detailed information for a specific locale + std::cout << "\nDetailed Locale Information:\n"; + LocaleEnumerator::printLocaleDetails("en_US"); + } + catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << std::endl; + return 1; + } + + return 0; +} + +void printSortOptions(const std::string&localeName) +{ + UErrorCode status = U_ZERO_ERROR; + std::unique_ptr collator(icu::Collator::createInstance(localeName.c_str(),status)); + +} + +void u16Test() { + { + Locale::ptr locale = Locale::GetTestInstance("en_US"); + REQUIRE(locale->GetCollator()->Compare(u"ÄbcAe",u"Abcde") < 0); // standard sort order, not phone-book sort order. + REQUIRE(locale->GetCollator()->Compare(u"Äbcdefg",u"Abcde") > 0); // standard sort order, not phone-book sort order. + REQUIRE(locale->GetCollator()->Compare(u"Ä",u"A") == 0); // standard sort order, not phone-book sort order. + REQUIRE(locale->GetCollator()->Compare(u"A",u"b") < 0); + REQUIRE(locale->GetCollator()->Compare(u"B",u"a") > 0); + REQUIRE(locale->GetCollator()->Compare(u"A",u"a") == 0); + REQUIRE(locale->GetCollator()->Compare(u"c",u"ç") == 0); + REQUIRE(locale->GetCollator()->Compare(u"e",u"è") == 0); + + } + { + Locale::ptr locale = Locale::GetTestInstance("fr_FR"); + REQUIRE(locale->GetCollator()->Compare(u"Ä",u"A") == 0); // standard sort order, not phone-book sort order. + REQUIRE(locale->GetCollator()->Compare(u"A",u"b") < 0); + REQUIRE(locale->GetCollator()->Compare(u"B",u"a") > 0); + REQUIRE(locale->GetCollator()->Compare(u"A",u"a") == 0); + REQUIRE(locale->GetCollator()->Compare(u"c",u"ç") == 0); + REQUIRE(locale->GetCollator()->Compare(u"e",u"è") == 0); + } + { + Locale::ptr locale = Locale::GetTestInstance("de_DE"); + REQUIRE(locale->GetCollator()->Compare(u"Ä",u"A") == 0); // standard sort order, not phone-book sort order. + REQUIRE(locale->GetCollator()->Compare(u"A",u"b") < 0); + REQUIRE(locale->GetCollator()->Compare(u"B",u"a") > 0); + REQUIRE(locale->GetCollator()->Compare(u"A",u"a") == 0); + REQUIRE(locale->GetCollator()->Compare(u"c",u"ç") == 0); + REQUIRE(locale->GetCollator()->Compare(u"e",u"è") == 0); + } + { + Locale::ptr locale = Locale::GetTestInstance("da_DK"); + REQUIRE(locale->GetCollator()->Compare(u"Å",u"Z") > 0); + } +} + +void u8Test() { + { + Locale::ptr locale = Locale::GetTestInstance("en_US"); + REQUIRE(locale->GetCollator()->Compare("Äbcae","Abcde") < 0); // standard sort order, not phone-book sort order. + + REQUIRE(locale->GetCollator()->Compare("Ä","A") == 0); // standard sort order, not phone-book sort order. + REQUIRE(locale->GetCollator()->Compare("A","b") < 0); + REQUIRE(locale->GetCollator()->Compare("B","a") > 0); + REQUIRE(locale->GetCollator()->Compare("A","a") == 0); + REQUIRE(locale->GetCollator()->Compare("c","ç") == 0); + REQUIRE(locale->GetCollator()->Compare("e","è") == 0); + + } + { + Locale::ptr locale = Locale::GetTestInstance("fr_FR"); + REQUIRE(locale->GetCollator()->Compare("Ä","A") == 0); // standard sort order, not phone-book sort order. + REQUIRE(locale->GetCollator()->Compare("A","b") < 0); + REQUIRE(locale->GetCollator()->Compare("B","a") > 0); + REQUIRE(locale->GetCollator()->Compare("A","a") == 0); + REQUIRE(locale->GetCollator()->Compare("c","ç") == 0); + REQUIRE(locale->GetCollator()->Compare("e","è") == 0); + } + { + Locale::ptr locale = Locale::GetTestInstance("de_DE"); + REQUIRE(locale->GetCollator()->Compare("Ä","A") == 0); // standard sort order, not phone-book sort order. + REQUIRE(locale->GetCollator()->Compare("A","b") < 0); + REQUIRE(locale->GetCollator()->Compare("B","a") > 0); + REQUIRE(locale->GetCollator()->Compare("A","a") == 0); + REQUIRE(locale->GetCollator()->Compare("c","ç") == 0); + REQUIRE(locale->GetCollator()->Compare("e","è") == 0); + } + { + Locale::ptr locale = Locale::GetTestInstance("da_DK"); + REQUIRE(locale->GetCollator()->Compare("Å","Z") > 0); + } +} + +TEST_CASE( "Locale test", "[locale]" ) { + cout << "====== Locale test ===============" << endl; + + + // enumerateLocales(); + // printSortOptions("de_DE"); + + u8Test(); + u16Test(); + + + +} \ No newline at end of file