From d90b4d25fa38a615daebab30fe971b02b83aa67a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Tue, 17 Aug 2021 12:47:49 +0200 Subject: [PATCH 01/13] Add various wrappers around `GetLocaleInfoEx` and improve locale normalization for Win32 functions --- source/languageHandler.py | 66 +++++++++++++++++++++++++++++- tests/unit/test_languageHandler.py | 64 +++++++++++++++++++++++++++++ 2 files changed, 129 insertions(+), 1 deletion(-) diff --git a/source/languageHandler.py b/source/languageHandler.py index 5f16fabe9fe..1cfe340980a 100644 --- a/source/languageHandler.py +++ b/source/languageHandler.py @@ -15,6 +15,7 @@ import gettext import globalVars from logHandler import log +import winKernel from typing import Optional #a few Windows locale constants @@ -23,6 +24,12 @@ LOCALE_SLANGDISPLAYNAME=0x6f LOCALE_USER_DEFAULT = 0x400 LOCALE_CUSTOM_UNSPECIFIED = 0x1000 +LOCALE_SENGLISHLANGUAGENAME = 0x00001001 +LOCALE_SENGLISHCOUNTRYNAME = 0x00001002 +LOCALE_IDEFAULTANSICODEPAGE = 0x00001004 + +CP_ACP = "0" + #: Returned from L{localeNameToWindowsLCID} when the locale name cannot be mapped to a locale identifier. #: This might be because Windows doesn't know about the locale (e.g. "an"), #: because it is not a standardized locale name anywhere (e.g. "zz") @@ -31,6 +38,23 @@ curLang="en" + +def isNormalizedWin32Locale(localeName: str) -> bool: + hyphensCount = localeName.count("-") + underscoresCount = localeName.count("_") + if not hyphensCount and not underscoresCount: + return True + if hyphensCount: + return True + return False + + +def normalizeLocaleForWin32(localeName: str) -> str: + if not isNormalizedWin32Locale(localeName): + localeName = localeName.replace('_', '-', 1) + return localeName + + def localeNameToWindowsLCID(localeName): """Retreave the Windows locale identifier (LCID) for the given locale name @param localeName: a string of 2letterLanguage_2letterCountry or just language (2letterLanguage or 3letterLanguage) @@ -40,7 +64,7 @@ def localeNameToWindowsLCID(localeName): """ # Windows Vista (NT 6.0) and later is able to convert locale names to LCIDs. # Because NVDA supports Windows 7 (NT 6.1) SP1 and later, just use it directly. - localeName=localeName.replace('_','-') + localeName = normalizeLocaleForWin32(localeName) LCID=ctypes.windll.kernel32.LocaleNameToLCID(localeName,0) # #6259: In Windows 10, LOCALE_CUSTOM_UNSPECIFIED is returned for any locale name unknown to Windows. # This was observed for Aragonese ("an"). @@ -93,6 +117,46 @@ def getLanguageDescription(language): }.get(language,None) return desc + +def englishLanguageNameFromNVDALocale(localeName: str) -> Optional[str]: + """Returns either English name of the given language using `GetLocaleInfoEx` or None + if the given locale is not known to Windows.""" + localeName = normalizeLocaleForWin32(localeName) + buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHLANGUAGENAME, None, 0) + if buffLength: + buf = ctypes.create_unicode_buffer(buffLength) + winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHLANGUAGENAME, buf, buffLength) + return buf.value + return None + + +def englishCountryNameFromNVDALocale(localeName: str) -> Optional[str]: + """Returns either English name of the given country using GetLocaleInfoEx or None + if the given locale is not known to Windows.""" + localeName = normalizeLocaleForWin32(localeName) + buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHCOUNTRYNAME, None, 0) + if buffLength: + buf = ctypes.create_unicode_buffer(buffLength) + winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHCOUNTRYNAME, buf, buffLength) + return buf.value + return None + + +def ansiCodePageFromNVDALocale(localeName: str) -> Optional[str]: + """Returns either English name of the given country using GetLocaleInfoEx or None + if the given locale is not known to Windows.""" + localeName = normalizeLocaleForWin32(localeName) + buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_IDEFAULTANSICODEPAGE, None, 0) + if buffLength: + buf = ctypes.create_unicode_buffer(buffLength) + winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_IDEFAULTANSICODEPAGE, buf, buffLength) + codePage = buf.value + if codePage == CP_ACP: + codePage = str(winKernel.kernel32.GetACP()) + return codePage + return None + + def getAvailableLanguages(presentational=False): """generates a list of locale names, plus their full localized language and country names. @param presentational: whether this is meant to be shown alphabetically by language description diff --git a/tests/unit/test_languageHandler.py b/tests/unit/test_languageHandler.py index 149f7e4b9ac..c387d843189 100644 --- a/tests/unit/test_languageHandler.py +++ b/tests/unit/test_languageHandler.py @@ -45,6 +45,70 @@ def test_invalidLocale(self): self.assertEqual(lcid, LCID_NONE) +class Test_Normalization(unittest.TestCase): + + def test_isNormalizedWin32LocaleNormalizedLocale(self): + self.assertTrue(languageHandler.isNormalizedWin32Locale("en")) + self.assertTrue(languageHandler.isNormalizedWin32Locale("ro")) + self.assertTrue(languageHandler.isNormalizedWin32Locale("so")) + self.assertTrue(languageHandler.isNormalizedWin32Locale("ckb")) + self.assertTrue(languageHandler.isNormalizedWin32Locale("de-CH")) + self.assertTrue(languageHandler.isNormalizedWin32Locale("pl-PL")) + self.assertTrue(languageHandler.isNormalizedWin32Locale("de-DE_phoneb")) + self.assertTrue(languageHandler.isNormalizedWin32Locale("mn-Mong-CN")) + + def test_isNormalizedWin32LocaleInvalidLocales(self): + self.assertFalse(languageHandler.isNormalizedWin32Locale("pl_PL")) + self.assertFalse(languageHandler.isNormalizedWin32Locale("de_CH")) + self.assertFalse(languageHandler.isNormalizedWin32Locale("ru_RU")) + + def test_localeNormalizationForWin32(self): + self.assertEqual(languageHandler.normalizeLocaleForWin32("en"), "en") + self.assertEqual(languageHandler.normalizeLocaleForWin32("en-US"), "en-US") + self.assertEqual(languageHandler.normalizeLocaleForWin32("en_US"), "en-US") + self.assertEqual(languageHandler.normalizeLocaleForWin32("de-DE_phoneb"), "de-DE_phoneb") + self.assertEqual(languageHandler.normalizeLocaleForWin32("de_DE_phoneb"), "de-DE_phoneb") + + +class Test_GetLocaleInfoEx_Wrappers(unittest.TestCase): + """Set of tests for wrappers around `GetLocaleInfoEx` from `languageHandler`""" + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self._ansiCP = str(ctypes.windll.kernel32.GetACP()) + + def test_ValidEnglishLangNamesAreReturned(self): + """Smoke tests `languageHandler.englishLanguageNameFromNVDALocale` with some known locale names""" + self.assertEqual(languageHandler.englishLanguageNameFromNVDALocale("en"), "English") + self.assertEqual(languageHandler.englishLanguageNameFromNVDALocale("de"), "German") + self.assertEqual(languageHandler.englishLanguageNameFromNVDALocale("ne"), "Nepali") + self.assertEqual(languageHandler.englishLanguageNameFromNVDALocale("pt-BR"), "Portuguese") + self.assertEqual(languageHandler.englishLanguageNameFromNVDALocale("de_CH"), "German") + + def test_ValidEnglishCountryNamesAreReturned(self): + """Smoke tests `languageHandler.englishCountryNameFromNVDALocale` with some known locale names""" + self.assertEqual(languageHandler.englishCountryNameFromNVDALocale("en"), "United States") + self.assertEqual(languageHandler.englishCountryNameFromNVDALocale("de"), "Germany") + self.assertEqual(languageHandler.englishCountryNameFromNVDALocale("ne"), "Nepal") + self.assertEqual(languageHandler.englishCountryNameFromNVDALocale("pt-BR"), "Brazil") + self.assertEqual(languageHandler.englishCountryNameFromNVDALocale("pt-PT"), "Portugal") + self.assertEqual(languageHandler.englishCountryNameFromNVDALocale("de_CH"), "Switzerland") + + def test_validAnsiCodePagesAreReturned(self): + """Smoke tests `languageHandler.ansiCodePageFromNVDALocale` with some known + not Unicode only locale names""" + self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("en"), "1252") + self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("pl_PL"), "1250") + self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("ja_JP"), "932") + self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("de-CH"), "1252") + + def test_validAnsiCodePagesAreReturnedUnicodeOnlyLocales(self): + """Smoke tests `languageHandler.ansiCodePageFromNVDALocale` with some known + Unicode only locale names""" + self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("hi"), self._ansiCP) + self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("Ne"), self._ansiCP) + + class Test_languageHandler_setLocale(unittest.TestCase): """Tests for the function languageHandler.setLocale""" From 69f8943f4dcc0819c13d989b36bb99ef18b8b87a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Tue, 17 Aug 2021 20:21:07 +0200 Subject: [PATCH 02/13] When setting Python's locale don't rely on `locale.normalize` as it is wrong on Windows rather construct locale string using Win32 functions --- source/languageHandler.py | 81 ++++++++++++++------------------------- 1 file changed, 28 insertions(+), 53 deletions(-) diff --git a/source/languageHandler.py b/source/languageHandler.py index 1cfe340980a..75ea3903888 100644 --- a/source/languageHandler.py +++ b/source/languageHandler.py @@ -277,72 +277,47 @@ def setLanguage(lang: str) -> None: # Install our pgettext function. builtins.pgettext = makePgettext(trans) + +def localeStringFromLocaleCode(localeCode: str) -> str: + normalizedLocaleCode = normalizeLocaleForWin32(localeCode) + langName = englishLanguageNameFromNVDALocale(normalizedLocaleCode) + if langName is None: + raise ValueError(f"Locale code {localeCode} not supported by Windows") + countryName = englishCountryNameFromNVDALocale(normalizedLocaleCode) + codePage = ansiCodePageFromNVDALocale(normalizedLocaleCode) + return f"{langName}_{countryName}.{codePage}" + + def setLocale(localeName: str) -> None: ''' Set python's locale using a `localeName` such as "en", "ru_RU", or "es-ES". Will fallback on `curLang` if it cannot be set and finally fallback to the system locale. ''' - - r''' - Python 3.8's locale system allows you to set locales that you cannot get - so we must test for both ValueErrors and locale.Errors - - >>> import locale - >>> locale.setlocale(locale.LC_ALL, 'foobar') - Traceback (most recent call last): - File "", line 1, in - File "Python38-32\lib\locale.py", line 608, in setlocale - return _setlocale(category, locale) - locale.Error: unsupported locale setting - >>> locale.setlocale(locale.LC_ALL, 'en-GB') - 'en-GB' - >>> locale.getlocale() - Traceback (most recent call last): - File "", line 1, in - File "Python38-32\lib\locale.py", line 591, in getlocale - return _parse_localename(localename) - File "Python38-32\lib\locale.py", line 499, in _parse_localename - raise ValueError('unknown locale: %s' % localename) - ValueError: unknown locale: en-GB - ''' originalLocaleName = localeName - # Try setting Python's locale to localeName + localeString = "" try: - locale.setlocale(locale.LC_ALL, localeName) - locale.getlocale() - log.debug(f"set python locale to {localeName}") - return - except locale.Error: - log.debugWarning(f"python locale {localeName} could not be set") + localeString = localeStringFromLocaleCode(localeName) + log.debug(f"Win32 locale string from locale code is {localeString}") except ValueError: - log.debugWarning(f"python locale {localeName} could not be retrieved with getlocale") - - if '-' in localeName: - # Python couldn't support the language-country locale, try language_country. - try: - localeName = localeName.replace('-', '_') - locale.setlocale(locale.LC_ALL, localeName) - locale.getlocale() - log.debug(f"set python locale to {localeName}") - return - except locale.Error: - log.debugWarning(f"python locale {localeName} could not be set") - except ValueError: - log.debugWarning(f"python locale {localeName} could not be retrieved with getlocale") - - if '_' in localeName: - # Python couldn't support the language_country locale, just try language. + log.debugWarning(f"Locale {localeName} not supported by Windows") + # Try just with a language name + if "-" in localeName: + localeName = localeName.split("-")[0] + try: + localeString = localeStringFromLocaleCode(localeName) + log.debug(f"Win32 locale string from locale code is {localeString}") + except ValueError: + log.debugWarning(f"Locale {localeName} not supported by Windows") + if localeString: try: - localeName = localeName.split('_')[0] - locale.setlocale(locale.LC_ALL, localeName) + locale.setlocale(locale.LC_ALL, localeString) locale.getlocale() - log.debug(f"set python locale to {localeName}") + log.debug(f"set python locale to {localeString}") return except locale.Error: - log.debugWarning(f"python locale {localeName} could not be set") + log.debugWarning(f"python locale {localeString} could not be set") except ValueError: - log.debugWarning(f"python locale {localeName} could not be retrieved with getlocale") - + log.debugWarning(f"python locale {localeString} could not be retrieved with getlocale") try: locale.getlocale() except ValueError: From d2491a4a150e83c950b5e13dab3c4e4a5cb1b224 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Tue, 17 Aug 2021 20:24:05 +0200 Subject: [PATCH 03/13] Remove work aroud for issue #12160 PR #12250 as it is no longer needed since we're now setting Python's locale to a correct Win32 ones. --- source/logHandler.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/source/logHandler.py b/source/logHandler.py index 56f4ed98e7d..049432a83e3 100755 --- a/source/logHandler.py +++ b/source/logHandler.py @@ -288,18 +288,6 @@ class Formatter(logging.Formatter): def formatException(self, ex): return stripBasePathFromTracebackText(super(Formatter, self).formatException(ex)) - def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str: - """Custom implementation of `formatTime` which avoids `time.localtime` - since it causes a crash under some versions of Universal CRT ( #12160, Python issue 36792) - """ - timeAsFileTime = winKernel.time_tToFileTime(record.created) - timeAsSystemTime = winKernel.SYSTEMTIME() - winKernel.FileTimeToSystemTime(timeAsFileTime, timeAsSystemTime) - timeAsLocalTime = winKernel.SYSTEMTIME() - winKernel.SystemTimeToTzSpecificLocalTime(None, timeAsSystemTime, timeAsLocalTime) - res = f"{timeAsLocalTime.wHour:02d}:{timeAsLocalTime.wMinute:02d}:{timeAsLocalTime.wSecond:02d}" - return self.default_msec_format % (res, record.msecs) - class StreamRedirector(object): """Redirects an output stream to a logger. From 4df7063a64ef33a7011821c487f065abdb3f5525 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Wed, 18 Aug 2021 22:12:30 +0200 Subject: [PATCH 04/13] Expand / fix unit tests --- source/languageHandler.py | 95 ++++++++++++++----- source/localesData.py | 29 ++++++ tests/unit/test_languageHandler.py | 143 ++++++++++++++++++++--------- 3 files changed, 202 insertions(+), 65 deletions(-) create mode 100644 source/localesData.py diff --git a/source/languageHandler.py b/source/languageHandler.py index 75ea3903888..478fcaab36b 100644 --- a/source/languageHandler.py +++ b/source/languageHandler.py @@ -40,6 +40,8 @@ def isNormalizedWin32Locale(localeName: str) -> bool: + """Checks if the given locale is in a form which can be used by Win32 locale functions such as + `GetLocaleInfoEx`. See `normalizeLocaleForWin32` for more comments.""" hyphensCount = localeName.count("-") underscoresCount = localeName.count("_") if not hyphensCount and not underscoresCount: @@ -50,6 +52,16 @@ def isNormalizedWin32Locale(localeName: str) -> bool: def normalizeLocaleForWin32(localeName: str) -> str: + """Converts given locale to a form which can be used by Win32 locale functions such as + `GetLocaleInfoEx` unless locale is normalized already. + Uses hyphen as a language/country separator taking care not to replace underscores used + as a separator between country name and alternate order specifiers. + For example locales using alternate sorts see: + https://docs.microsoft.com/en-us/openspecs/windows_protocols/ms-lcid/e6a54e86-9660-44fa-a005-d00da97722f2 + While NVDA does not support locales requiring multiple sorting orders users may still have their Windows + set to such locale and if all underscores were replaced unconditionally + we would be unable to generate Python locale from their default UI language. + """ if not isNormalizedWin32Locale(localeName): localeName = localeName.replace('_', '-', 1) return localeName @@ -103,18 +115,10 @@ def getLanguageDescription(language): desc=buf.value if not desc: #Some hard-coded descriptions where we know the language fails on various configurations. - desc={ - # Translators: The name of a language supported by NVDA. - "an":pgettext("languageName","Aragonese"), - # Translators: The name of a language supported by NVDA. - "ckb":pgettext("languageName","Central Kurdish"), - # Translators: The name of a language supported by NVDA. - "kmr":pgettext("languageName","Northern Kurdish"), - # Translators: The name of a language supported by NVDA. - "my":pgettext("languageName","Burmese"), - # Translators: The name of a language supported by NVDA. - "so":pgettext("languageName","Somali"), - }.get(language,None) + # Imported lazily since langs description are translatable + # and `languageHandler` is responsible for setting the translation. + import localesData + desc = localesData.LANG_NAMES_TO_LOCALIZED_DESCS.get(language, None) return desc @@ -126,7 +130,26 @@ def englishLanguageNameFromNVDALocale(localeName: str) -> Optional[str]: if buffLength: buf = ctypes.create_unicode_buffer(buffLength) winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHLANGUAGENAME, buf, buffLength) - return buf.value + langName = buf.value + if "Unknown" in langName: + return None + try: + langName.encode("ascii") + return langName + except UnicodeEncodeError: + # The language name cannot be encoded in ASCII which unfortunately means we wonn't be able + # to set Python's locale to it (Python issue 26024). + # this has been observed for Norwegian + # (language name as returned from Windows is 'Norwegian Bokmål'). + # Thankfully keeping just the ASCII part of the string yields the desired result. + partsList = [] + for part in langName.split(): + try: + part.encode("ascii") + partsList.append(part) + except UnicodeEncodeError: + continue + return " ".join(partsList) return None @@ -138,7 +161,12 @@ def englishCountryNameFromNVDALocale(localeName: str) -> Optional[str]: if buffLength: buf = ctypes.create_unicode_buffer(buffLength) winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHCOUNTRYNAME, buf, buffLength) - return buf.value + if "Unknown" in buf.value: + return None + # Country name can contain dots such as 'Hong Kong S.A.R.'. + # Python's `setlocale` cannot deal with that. + # Removing dots works though. + return buf.value.replace(".", "") return None @@ -146,12 +174,16 @@ def ansiCodePageFromNVDALocale(localeName: str) -> Optional[str]: """Returns either English name of the given country using GetLocaleInfoEx or None if the given locale is not known to Windows.""" localeName = normalizeLocaleForWin32(localeName) + if not englishCountryNameFromNVDALocale(localeName): + return None buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_IDEFAULTANSICODEPAGE, None, 0) if buffLength: buf = ctypes.create_unicode_buffer(buffLength) winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_IDEFAULTANSICODEPAGE, buf, buffLength) - codePage = buf.value + codePage = buf.value if codePage == CP_ACP: + # Some locales such as Hindi are Unicode only i.e. they don't have specific ANSI code page. + # In such case code page should be set to the default ANSI code page of the system. codePage = str(winKernel.kernel32.GetACP()) return codePage return None @@ -300,9 +332,21 @@ def setLocale(localeName: str) -> None: log.debug(f"Win32 locale string from locale code is {localeString}") except ValueError: log.debugWarning(f"Locale {localeName} not supported by Windows") - # Try just with a language name - if "-" in localeName: - localeName = localeName.split("-")[0] + if localeString: + try: + locale.setlocale(locale.LC_ALL, localeString) + locale.getlocale() + log.debug(f"set python locale to {localeString}") + return + except locale.Error: + log.debugWarning(f"python locale {localeString} could not be set") + except ValueError: + log.debugWarning(f"python locale {localeString} could not be retrieved with getlocale") + # The full form langName_country either cannot be retrieved from Windows + # or Python cannot be set to that locale. + # Try just with the language name. + if "_" in localeName: + localeName = localeName.split("_")[0] try: localeString = localeStringFromLocaleCode(localeName) log.debug(f"Win32 locale string from locale code is {localeString}") @@ -318,9 +362,18 @@ def setLocale(localeName: str) -> None: log.debugWarning(f"python locale {localeString} could not be set") except ValueError: log.debugWarning(f"python locale {localeString} could not be retrieved with getlocale") - try: - locale.getlocale() - except ValueError: + localeFromLang = englishLanguageNameFromNVDALocale(localeName) + if localeFromLang: + try: + locale.setlocale(locale.LC_ALL, localeFromLang) + locale.getlocale() + log.debug(f"set python locale to {localeFromLang}") + return + except locale.Error: + log.debugWarning(f"python locale {localeFromLang} could not be set") + except ValueError: + log.debugWarning(f"python locale {localeFromLang} could not be retrieved with getlocale") + if not localeString: # as the locale may have been changed to something that getlocale() couldn't retrieve # reset to default locale if originalLocaleName == curLang: diff --git a/source/localesData.py b/source/localesData.py new file mode 100644 index 00000000000..e54a665a06c --- /dev/null +++ b/source/localesData.py @@ -0,0 +1,29 @@ +# A part of NonVisual Desktop Access (NVDA) +# Copyright (C) 2012-2021 NV Access Limited, Joseph Lee, Łukasz Golonka +# This file may be used under the terms of the GNU General Public License, version 2 or later. +# For more details see: https://www.gnu.org/licenses/gpl-2.0.html + + +"""Contains informations about various languages supported by NVDA. +As there are localizable strings at module level, +this can only be imported once localization is set up via `languageHandler.initialize`. +""" + + +from typing import Dict + + +# Maps names of languages supported by NVDA to their translated names +# for langs for which Windows does not contain a translated description. +LANG_NAMES_TO_LOCALIZED_DESCS: Dict[str, str] = { + # Translators: The name of a language supported by NVDA. + "an": pgettext("languageName", "Aragonese"), + # Translators: The name of a language supported by NVDA. + "ckb": pgettext("languageName", "Central Kurdish"), + # Translators: The name of a language supported by NVDA. + "kmr": pgettext("languageName", "Northern Kurdish"), + # Translators: The name of a language supported by NVDA. + "my": pgettext("languageName", "Burmese"), + # Translators: The name of a language supported by NVDA. + "so": pgettext("languageName", "Somali"), +} diff --git a/tests/unit/test_languageHandler.py b/tests/unit/test_languageHandler.py index c387d843189..c41eb27d5e3 100644 --- a/tests/unit/test_languageHandler.py +++ b/tests/unit/test_languageHandler.py @@ -9,19 +9,25 @@ import unittest import languageHandler from languageHandler import LCID_NONE, windowsPrimaryLCIDsToLocaleNames +from localesData import LANG_NAMES_TO_LOCALIZED_DESCS import locale import ctypes + +def generateUnsupportedWindowsLocales(): + """Generates list of languages which are not supported under the current version of Windows. + Uses `localesData.LANG_NAMES_TO_LOCALIZED_DESCS` as a base but filters further + since unsupported languages are different under different systems.""" + unsupportedLangs = set() + for localeName in LANG_NAMES_TO_LOCALIZED_DESCS.keys(): + # `languageHandler.englishCountryNameFromNVDALocale` returns `None` for locale unknown to Windows. + if not languageHandler.englishCountryNameFromNVDALocale(localeName): + unsupportedLangs.add(localeName) + return unsupportedLangs + + LCID_ENGLISH_US = 0x0409 -UNSUPPORTED_PYTHON_LOCALES = { - "an", - "ckb", - "kmr", - "mn", - "my", - "ne", - "so", -} +UNSUPPORTED_WIN_LANGUAGES = generateUnsupportedWindowsLocales() TRANSLATABLE_LANGS = set(l[0] for l in languageHandler.getAvailableLanguages()) - {"Windows"} WINDOWS_LANGS = set(locale.windows_locale.values()).union(windowsPrimaryLCIDsToLocaleNames.values()) @@ -73,9 +79,7 @@ def test_localeNormalizationForWin32(self): class Test_GetLocaleInfoEx_Wrappers(unittest.TestCase): """Set of tests for wrappers around `GetLocaleInfoEx` from `languageHandler`""" - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._ansiCP = str(ctypes.windll.kernel32.GetACP()) + POSSIBLE_CODE_PAGES_FOR_UNICODE_ONLY_LOCALES = {str(ctypes.windll.kernel32.GetACP()), "65001"} def test_ValidEnglishLangNamesAreReturned(self): """Smoke tests `languageHandler.englishLanguageNameFromNVDALocale` with some known locale names""" @@ -85,6 +89,18 @@ def test_ValidEnglishLangNamesAreReturned(self): self.assertEqual(languageHandler.englishLanguageNameFromNVDALocale("pt-BR"), "Portuguese") self.assertEqual(languageHandler.englishLanguageNameFromNVDALocale("de_CH"), "German") + def test_noLangNameFromUnknownLocale(self): + """Smoke tests `languageHandler.englishLanguageNameFromNVDALocale` + with locale names unknown to Windows""" + self.assertIsNone(languageHandler.englishLanguageNameFromNVDALocale("an")) + self.assertIsNone(languageHandler.englishLanguageNameFromNVDALocale("kmr")) + + def test_englishLanguageNameFromNVDALocaleNonASCIILangNames(self): + """Ensures that `languageHandler.englishLanguageNameFromNVDALocale` + can deal with non ASCII language names returned from Windows.""" + self.assertEqual(languageHandler.englishLanguageNameFromNVDALocale("nb"), "Norwegian") + self.assertEqual(languageHandler.englishLanguageNameFromNVDALocale("nb_NO"), "Norwegian") + def test_ValidEnglishCountryNamesAreReturned(self): """Smoke tests `languageHandler.englishCountryNameFromNVDALocale` with some known locale names""" self.assertEqual(languageHandler.englishCountryNameFromNVDALocale("en"), "United States") @@ -94,25 +110,52 @@ def test_ValidEnglishCountryNamesAreReturned(self): self.assertEqual(languageHandler.englishCountryNameFromNVDALocale("pt-PT"), "Portugal") self.assertEqual(languageHandler.englishCountryNameFromNVDALocale("de_CH"), "Switzerland") + def test_noCountryNameFromUnknownLocale(self): + """Smoke tests `languageHandler.englishCountryNameFromNVDALocale` + with locale names unknown to Windows""" + self.assertIsNone(languageHandler.englishCountryNameFromNVDALocale("an")) + self.assertIsNone(languageHandler.englishCountryNameFromNVDALocale("kmr")) + + def test_englishCountryNameFromNVDALocaleLocaleWithDot(self): + """Ensures that `languageHandler.englishCountryNameFromNVDALocale` removes all dots + from the affected country names.""" + self.assertEqual(languageHandler.englishCountryNameFromNVDALocale("zh_HK"), "Hong Kong SAR") + def test_validAnsiCodePagesAreReturned(self): - """Smoke tests `languageHandler.ansiCodePageFromNVDALocale` with some known + """Smoke tests `languageHandler.ansiCodePageFromNVDALocale` with some known not Unicode only locale names""" self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("en"), "1252") self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("pl_PL"), "1250") self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("ja_JP"), "932") self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("de-CH"), "1252") + def test_noCodePageFromUnknownLocale(self): + """Smoke tests `languageHandler.ansiCodePageFromNVDALocale` + with locale names unknown to Windows""" + self.assertIsNone(languageHandler.ansiCodePageFromNVDALocale("an")) + self.assertIsNone(languageHandler.ansiCodePageFromNVDALocale("kmr")) + def test_validAnsiCodePagesAreReturnedUnicodeOnlyLocales(self): """Smoke tests `languageHandler.ansiCodePageFromNVDALocale` with some known Unicode only locale names""" - self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("hi"), self._ansiCP) - self.assertEqual(languageHandler.ansiCodePageFromNVDALocale("Ne"), self._ansiCP) + self.assertIn( + languageHandler.ansiCodePageFromNVDALocale("hi"), + self.POSSIBLE_CODE_PAGES_FOR_UNICODE_ONLY_LOCALES + ) + self.assertIn( + languageHandler.ansiCodePageFromNVDALocale("Ne"), + self.POSSIBLE_CODE_PAGES_FOR_UNICODE_ONLY_LOCALES + ) class Test_languageHandler_setLocale(unittest.TestCase): """Tests for the function languageHandler.setLocale""" - SUPPORTED_LOCALES = [("en", "en_US"), ("fa-IR", "fa_IR"), ("an-ES", "an_ES")] + SUPPORTED_LOCALES = [ + ("en", 'English_United States.1252'), + ("fa-IR", "Persian_Iran.1256"), + ("pl_PL", "Polish_Poland.1250") + ] def setUp(self): """ @@ -136,39 +179,44 @@ def test_SupportedLocale_LocaleIsSet(self): for localeName in self.SUPPORTED_LOCALES: with self.subTest(localeName=localeName): languageHandler.setLocale(localeName[0]) - self.assertEqual(locale.getlocale()[0], localeName[1]) + self.assertEqual(locale.setlocale(locale.LC_ALL), localeName[1]) def test_PythonUnsupportedLocale_LocaleUnchanged(self): """ Tests several locale formats that python doesn't support which will result in a return to the current locale """ - original_locale = locale.getlocale() - for localeName in UNSUPPORTED_PYTHON_LOCALES: + original_locale = locale.setlocale(locale.LC_ALL) + for localeName in UNSUPPORTED_WIN_LANGUAGES: with self.subTest(localeName=localeName): languageHandler.setLocale(localeName) - self.assertEqual(locale.getlocale(), original_locale) + self.assertEqual(locale.setlocale(locale.LC_ALL), original_locale) def test_NVDASupportedAndPythonSupportedLocale_LanguageCodeMatches(self): """ Tests all the translatable languages that NVDA shows in the user preferences excludes the locales that python doesn't support, as the expected behaviour is different. """ - for localeName in TRANSLATABLE_LANGS - UNSUPPORTED_PYTHON_LOCALES: + for localeName in TRANSLATABLE_LANGS - UNSUPPORTED_WIN_LANGUAGES: with self.subTest(localeName=localeName): languageHandler.setLocale(localeName) - current_locale = locale.getlocale() - - if localeName == "uk": - self.assertEqual(current_locale[0], "English_United Kingdom") - else: - pythonLang = current_locale[0].split("_")[0] - langOnly = localeName.split("_")[0] - self.assertEqual( - langOnly, - pythonLang, - f"full values: {localeName} {current_locale[0]}", - ) + current_locale = locale.setlocale(locale.LC_ALL) + # check that the language codes are correctly set for python + # They can be set to the exact locale that was requested, or to the locale gotten + # from the language name if language_country cannot be set. + lang_country = languageHandler.localeStringFromLocaleCode(localeName) + possibleVariants = {lang_country} + if "65001" in lang_country: + # Python replaces Unicode Windows code page with 'utf8' + possibleVariants.add(lang_country.replace("65001", "utf8")) + if "_" in lang_country: + possibleVariants.add(languageHandler.localeStringFromLocaleCode(localeName.split("_")[0])) + possibleVariants.add(languageHandler.englishLanguageNameFromNVDALocale(localeName)) + self.assertIn( + current_locale, + possibleVariants, + f"full values: {localeName} {current_locale}", + ) def test_WindowsLang_LocaleCanBeRetrieved(self): """ @@ -184,8 +232,6 @@ def test_WindowsLang_LocaleCanBeRetrieved(self): class Test_LanguageHandler_SetLanguage(unittest.TestCase): """Tests for the function languageHandler.setLanguage""" - UNSUPPORTED_WIN_LANGUAGES = ["an", "kmr"] - def tearDown(self): """ Resets the language to whatever it was before the testing suite begun. @@ -202,7 +248,7 @@ def __init__(self, *args, **kwargs): ) locale.setlocale(locale.LC_ALL, "") - self._defaultPythonLocale = locale.getlocale() + self._defaultPythonLocale = locale.setlocale(locale.LC_ALL) languageHandler.setLanguage(self._prevLang) super().__init__(*args, **kwargs) @@ -225,7 +271,7 @@ def test_NVDASupportedLanguages_LanguageIsSetCorrectly(self): threadLocale = ctypes.windll.kernel32.GetThreadLocale() threadLocaleName = languageHandler.windowsLCIDToLocaleName(threadLocale) threadLocaleLang = threadLocaleName.split("_")[0] - if localeName in self.UNSUPPORTED_WIN_LANGUAGES: + if localeName in UNSUPPORTED_WIN_LANGUAGES: # our translatable locale isn't supported by windows # check that the system locale is unchanged self.assertEqual(self._defaultThreadLocaleName, threadLocaleName) @@ -238,18 +284,27 @@ def test_NVDASupportedLanguages_LanguageIsSetCorrectly(self): ) # check that the python locale is set - python_locale = locale.getlocale() - if localeName in UNSUPPORTED_PYTHON_LOCALES: + python_locale = locale.setlocale(locale.LC_ALL) + if localeName in UNSUPPORTED_WIN_LANGUAGES: # our translatable locale isn't supported by python # check that the system locale is unchanged self.assertEqual(self._defaultPythonLocale, python_locale) - elif localeName == "uk": - self.assertEqual(python_locale[0], "English_United Kingdom") else: # check that the language codes are correctly set for python - pythonLang = python_locale[0].split("_")[0] - self.assertEqual( - langOnly, pythonLang, f"full values: {localeName} {python_locale}" + # They can be set to the exact locale that was requested, or to the locale gotten + # from the language name if language_country cannot be set. + lang_country = languageHandler.localeStringFromLocaleCode(localeName) + possibleVariants = {lang_country} + if "65001" in lang_country: + # Python replaces Unicode Windows code page with 'utf8' + possibleVariants.add(lang_country.replace("65001", "utf8")) + if "_" in lang_country: + possibleVariants.add(languageHandler.localeStringFromLocaleCode(localeName.split("_")[0])) + possibleVariants.add(languageHandler.englishLanguageNameFromNVDALocale(localeName)) + self.assertIn( + locale.setlocale(locale.LC_ALL), + possibleVariants, + f"full values: {localeName} {python_locale}" ) def test_WindowsLanguages_NoErrorsThrown(self): From 4109c41f1bc3307ced93904a069d77eaed5a8559 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Thu, 19 Aug 2021 19:01:15 +0200 Subject: [PATCH 05/13] Improved implementation of `InitLocale` for our instance of `wx.App` --- source/core.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/source/core.py b/source/core.py index 8172bde8697..4703b914a6a 100644 --- a/source/core.py +++ b/source/core.py @@ -467,11 +467,16 @@ def OnAssert(self,file,line,cond,msg): log.debugWarning(message,codepath="WX Widgets",stack_info=True) def InitLocale(self): - # Backport of `InitLocale` from wx Python 4.1.2 as the current version tries to set a Python - # locale to an nonexistent one when creating an instance of `wx.App`. - # This causes a crash when running under a particular version of Universal CRT (#12160) - import locale - locale.setlocale(locale.LC_ALL, "C") + """Custom implementation of `InitLocale` which ensures that wxPython does not change the locale. + The current wx implementation (as of wxPython 4.1.1) sets Python locale to an invalid one + which triggers Python issue 36792 (#12160). + The new implementation (wxPython 4.1.2) sets locale to "C" (basic Unicode locale). + While this is not wrong as such NVDA manages locale themselves using `languageHandler` + and it is better to remove wx from the equation so this method is a No-op. + This code may need to be revisited when we update Python / wxPython. + """ + pass + app = App(redirect=False) # We support queryEndSession events, but in general don't do anything for them. From 7fd9cfd9844ff61507d7431a007811538178421a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Thu, 19 Aug 2021 19:18:51 +0200 Subject: [PATCH 06/13] Improve comments - add some docstrings --- source/languageHandler.py | 36 +++++++++++++++++++++++++++--- source/localesData.py | 4 ++-- tests/unit/test_languageHandler.py | 19 +++++++++++----- 3 files changed, 48 insertions(+), 11 deletions(-) diff --git a/source/languageHandler.py b/source/languageHandler.py index 478fcaab36b..ab56940ef8d 100644 --- a/source/languageHandler.py +++ b/source/languageHandler.py @@ -132,6 +132,8 @@ def englishLanguageNameFromNVDALocale(localeName: str) -> Optional[str]: winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHLANGUAGENAME, buf, buffLength) langName = buf.value if "Unknown" in langName: + # Windows 10 returns 'Unknown' for locales not known to Windows + # even though documentation states that in case of an unknown locale 0 is returned. return None try: langName.encode("ascii") @@ -162,6 +164,8 @@ def englishCountryNameFromNVDALocale(localeName: str) -> Optional[str]: buf = ctypes.create_unicode_buffer(buffLength) winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHCOUNTRYNAME, buf, buffLength) if "Unknown" in buf.value: + # Windows 10 returns 'Unknown region' for locales not known to Windows + # even though documentation states that in case of an unknown locale 0 is returned. return None # Country name can contain dots such as 'Hong Kong S.A.R.'. # Python's `setlocale` cannot deal with that. @@ -171,9 +175,15 @@ def englishCountryNameFromNVDALocale(localeName: str) -> Optional[str]: def ansiCodePageFromNVDALocale(localeName: str) -> Optional[str]: - """Returns either English name of the given country using GetLocaleInfoEx or None + """Returns either ANSI code page for a given locale using GetLocaleInfoEx or None if the given locale is not known to Windows.""" localeName = normalizeLocaleForWin32(localeName) + # Windows 10 returns English code page (1252) for locales not known to Windows + # even though documentation states that in case of an unknown locale 0 is returned. + # This means that it is impossible to differentiate locales that are unknown + # and locales using 1252 as ANSI code page. + # Use `englishCountryNameFromNVDALocale` to determine if the given locale is supported or not + # before attempting to retrieve code page. if not englishCountryNameFromNVDALocale(localeName): return None buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_IDEFAULTANSICODEPAGE, None, 0) @@ -311,6 +321,13 @@ def setLanguage(lang: str) -> None: def localeStringFromLocaleCode(localeCode: str) -> str: + """Given an NVDA locale such as 'en' or or a Windows locale such as 'pl_PL' + creates a locale representation in a standard form for Win32 + which can be safely passed to Python's `setlocale`. + The required format is: + 'englishLanguageName_englishCountryName.localeANSICodePage' + Raises exception if the given locale is not known to Windows. + """ normalizedLocaleCode = normalizeLocaleForWin32(localeCode) langName = englishLanguageNameFromNVDALocale(normalizedLocaleCode) if langName is None: @@ -324,9 +341,15 @@ def setLocale(localeName: str) -> None: ''' Set python's locale using a `localeName` such as "en", "ru_RU", or "es-ES". Will fallback on `curLang` if it cannot be set and finally fallback to the system locale. + Passing NVDA locales straight to python `locale.setlocale` does now work since it tries to normalize the + parameter using `locale.normalize` which results in locales unknown to Windows (Python issue 37945). + For example executing: `locale.setlocale(locale.LC_ALL, "pl")` + results in locale being set to `('pl_PL', 'ISO8859-2')` + which is meaningless to Windows, ''' originalLocaleName = localeName localeString = "" + failedToSetLocale = False try: localeString = localeStringFromLocaleCode(localeName) log.debug(f"Win32 locale string from locale code is {localeString}") @@ -339,8 +362,10 @@ def setLocale(localeName: str) -> None: log.debug(f"set python locale to {localeString}") return except locale.Error: + failedToSetLocale = True log.debugWarning(f"python locale {localeString} could not be set") except ValueError: + failedToSetLocale = True log.debugWarning(f"python locale {localeString} could not be retrieved with getlocale") # The full form langName_country either cannot be retrieved from Windows # or Python cannot be set to that locale. @@ -359,9 +384,12 @@ def setLocale(localeName: str) -> None: log.debug(f"set python locale to {localeString}") return except locale.Error: + failedToSetLocale = True log.debugWarning(f"python locale {localeString} could not be set") except ValueError: + failedToSetLocale = True log.debugWarning(f"python locale {localeString} could not be retrieved with getlocale") + # As a final fallback try setting locale just to the English name of the given language. localeFromLang = englishLanguageNameFromNVDALocale(localeName) if localeFromLang: try: @@ -370,11 +398,13 @@ def setLocale(localeName: str) -> None: log.debug(f"set python locale to {localeFromLang}") return except locale.Error: + failedToSetLocale = True log.debugWarning(f"python locale {localeFromLang} could not be set") except ValueError: + failedToSetLocale = True log.debugWarning(f"python locale {localeFromLang} could not be retrieved with getlocale") - if not localeString: - # as the locale may have been changed to something that getlocale() couldn't retrieve + if not localeString or failedToSetLocale: + # Either Windows does not know the locale, or Python is unable to handle it. # reset to default locale if originalLocaleName == curLang: # reset to system locale default if we can't set the current lang's locale diff --git a/source/localesData.py b/source/localesData.py index e54a665a06c..c8f69161bb8 100644 --- a/source/localesData.py +++ b/source/localesData.py @@ -4,7 +4,7 @@ # For more details see: https://www.gnu.org/licenses/gpl-2.0.html -"""Contains informations about various languages supported by NVDA. +"""Contains information about various languages supported by NVDA. As there are localizable strings at module level, this can only be imported once localization is set up via `languageHandler.initialize`. """ @@ -14,7 +14,7 @@ # Maps names of languages supported by NVDA to their translated names -# for langs for which Windows does not contain a translated description. +# for langs for which Windows does not contain a translated description. LANG_NAMES_TO_LOCALIZED_DESCS: Dict[str, str] = { # Translators: The name of a language supported by NVDA. "an": pgettext("languageName", "Aragonese"), diff --git a/tests/unit/test_languageHandler.py b/tests/unit/test_languageHandler.py index c41eb27d5e3..ffdc0027f2b 100644 --- a/tests/unit/test_languageHandler.py +++ b/tests/unit/test_languageHandler.py @@ -202,12 +202,13 @@ def test_NVDASupportedAndPythonSupportedLocale_LanguageCodeMatches(self): languageHandler.setLocale(localeName) current_locale = locale.setlocale(locale.LC_ALL) # check that the language codes are correctly set for python - # They can be set to the exact locale that was requested, or to the locale gotten - # from the language name if language_country cannot be set. + # They can be set to the exact locale that was requested, to the locale gotten + # from the language name if language_country cannot be set + # or just to English name of the language. lang_country = languageHandler.localeStringFromLocaleCode(localeName) possibleVariants = {lang_country} if "65001" in lang_country: - # Python replaces Unicode Windows code page with 'utf8' + # Python normalizes Unicode Windows code page to 'utf8' possibleVariants.add(lang_country.replace("65001", "utf8")) if "_" in lang_country: possibleVariants.add(languageHandler.localeStringFromLocaleCode(localeName.split("_")[0])) @@ -222,6 +223,11 @@ def test_WindowsLang_LocaleCanBeRetrieved(self): """ We don't know whether python supports a specific windows locale so just ensure locale isn't broken after testing these values. + Even though we cannot use `locale.getlocale` when checking if the correct locale has been set + in all other tests since it normalizes locale making it impossible to do comparisons + it is important that whatever is being set can be retrieved with `getlocale` + since some parts of Python standard library such as `time.strptime` relies on `getlocale` + being able to return current locale. """ for localeName in WINDOWS_LANGS: with self.subTest(localeName=localeName): @@ -291,12 +297,13 @@ def test_NVDASupportedLanguages_LanguageIsSetCorrectly(self): self.assertEqual(self._defaultPythonLocale, python_locale) else: # check that the language codes are correctly set for python - # They can be set to the exact locale that was requested, or to the locale gotten - # from the language name if language_country cannot be set. + # They can be set to the exact locale that was requested, to the locale gotten + # from the language name if language_country cannot be set + # or just to English name of the language. lang_country = languageHandler.localeStringFromLocaleCode(localeName) possibleVariants = {lang_country} if "65001" in lang_country: - # Python replaces Unicode Windows code page with 'utf8' + # Python normalizes Unicode Windows code page to 'utf8' possibleVariants.add(lang_country.replace("65001", "utf8")) if "_" in lang_country: possibleVariants.add(languageHandler.localeStringFromLocaleCode(localeName.split("_")[0])) From 853f765a2c7f3cec84664d5cae9a596bc2e69433 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Fri, 20 Aug 2021 16:53:48 +0200 Subject: [PATCH 07/13] Clarify CP_ACP with a comment --- source/languageHandler.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/source/languageHandler.py b/source/languageHandler.py index ab56940ef8d..0bd15a4af62 100644 --- a/source/languageHandler.py +++ b/source/languageHandler.py @@ -28,6 +28,8 @@ LOCALE_SENGLISHCOUNTRYNAME = 0x00001002 LOCALE_IDEFAULTANSICODEPAGE = 0x00001004 +# A constant returned when asking Windows for a default code page for a given locale +# and its code page is the default code page for non Unicode programs set in Windows. CP_ACP = "0" #: Returned from L{localeNameToWindowsLCID} when the locale name cannot be mapped to a locale identifier. From d2b973636c0ecb84fa02bcbfa0cd7804b9e2f31c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Fri, 20 Aug 2021 18:24:53 +0200 Subject: [PATCH 08/13] Move all NLS constants into an enum --- source/appModules/outlook.py | 2 +- source/languageHandler.py | 44 ++++++++++++++++++++++++------------ 2 files changed, 31 insertions(+), 15 deletions(-) diff --git a/source/appModules/outlook.py b/source/appModules/outlook.py index bc5910dee9f..cade39b118f 100644 --- a/source/appModules/outlook.py +++ b/source/appModules/outlook.py @@ -348,7 +348,7 @@ def _generateCategoriesText(appointment): separatorBuf = ctypes.create_unicode_buffer(bufLength) if ctypes.windll.kernel32.GetLocaleInfoW( languageHandler.LOCALE_USER_DEFAULT, - languageHandler.LOCALE_SLIST, + languageHandler.LOCALE.SLIST, separatorBuf, bufLength ) == 0: diff --git a/source/languageHandler.py b/source/languageHandler.py index 0bd15a4af62..1bbc3853fb1 100644 --- a/source/languageHandler.py +++ b/source/languageHandler.py @@ -13,20 +13,16 @@ import ctypes import locale import gettext +import enum +import buildVersion import globalVars from logHandler import log import winKernel from typing import Optional #a few Windows locale constants -LOCALE_SLANGUAGE=0x2 -LOCALE_SLIST = 0xC -LOCALE_SLANGDISPLAYNAME=0x6f LOCALE_USER_DEFAULT = 0x400 LOCALE_CUSTOM_UNSPECIFIED = 0x1000 -LOCALE_SENGLISHLANGUAGENAME = 0x00001001 -LOCALE_SENGLISHCOUNTRYNAME = 0x00001002 -LOCALE_IDEFAULTANSICODEPAGE = 0x00001004 # A constant returned when asking Windows for a default code page for a given locale # and its code page is the default code page for non Unicode programs set in Windows. @@ -41,6 +37,26 @@ curLang="en" +class LOCALE(enum.IntEnum): + # Represents NLS constants which can be used with `GetLocaleInfoEx` or `GetLocaleInfoW` + # Full list of these constants is available at: + # https://docs.microsoft.com/en-us/windows/win32/intl/locale-information-constants + SLANGUAGE = 0x2 + SLIST = 0xC + SLANGDISPLAYNAME = 0x6f + SENGLISHLANGUAGENAME = 0x00001001 + SENGLISHCOUNTRYNAME = 0x00001002 + IDEFAULTANSICODEPAGE = 0x00001004 + + +# These constants are deprecated and members of LOCALE enum should be used instead +# They would be removed in NVDA 2022.1 +if buildVersion.version_year < 2022: + LOCALE_SLANGUAGE = LOCALE.SLANGUAGE + LOCALE_SLIST = LOCALE.SLIST + LOCALE_SLANGDISPLAYNAME = LOCALE.SLANGDISPLAYNAME + + def isNormalizedWin32Locale(localeName: str) -> bool: """Checks if the given locale is in a form which can be used by Win32 locale functions such as `GetLocaleInfoEx`. See `normalizeLocaleForWin32` for more comments.""" @@ -109,11 +125,11 @@ def getLanguageDescription(language): buf=ctypes.create_unicode_buffer(1024) #If the original locale didn't have country info (was just language) then make sure we just get language from Windows if '_' not in language: - res=ctypes.windll.kernel32.GetLocaleInfoW(LCID,LOCALE_SLANGDISPLAYNAME,buf,1024) + res = ctypes.windll.kernel32.GetLocaleInfoW(LCID, LOCALE.SLANGDISPLAYNAME, buf, 1024) else: res=0 if res==0: - res=ctypes.windll.kernel32.GetLocaleInfoW(LCID,LOCALE_SLANGUAGE,buf,1024) + res = ctypes.windll.kernel32.GetLocaleInfoW(LCID, LOCALE.SLANGUAGE, buf, 1024) desc=buf.value if not desc: #Some hard-coded descriptions where we know the language fails on various configurations. @@ -128,10 +144,10 @@ def englishLanguageNameFromNVDALocale(localeName: str) -> Optional[str]: """Returns either English name of the given language using `GetLocaleInfoEx` or None if the given locale is not known to Windows.""" localeName = normalizeLocaleForWin32(localeName) - buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHLANGUAGENAME, None, 0) + buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE.SENGLISHLANGUAGENAME, None, 0) if buffLength: buf = ctypes.create_unicode_buffer(buffLength) - winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHLANGUAGENAME, buf, buffLength) + winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE.SENGLISHLANGUAGENAME, buf, buffLength) langName = buf.value if "Unknown" in langName: # Windows 10 returns 'Unknown' for locales not known to Windows @@ -161,10 +177,10 @@ def englishCountryNameFromNVDALocale(localeName: str) -> Optional[str]: """Returns either English name of the given country using GetLocaleInfoEx or None if the given locale is not known to Windows.""" localeName = normalizeLocaleForWin32(localeName) - buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHCOUNTRYNAME, None, 0) + buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE.SENGLISHCOUNTRYNAME, None, 0) if buffLength: buf = ctypes.create_unicode_buffer(buffLength) - winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_SENGLISHCOUNTRYNAME, buf, buffLength) + winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE.SENGLISHCOUNTRYNAME, buf, buffLength) if "Unknown" in buf.value: # Windows 10 returns 'Unknown region' for locales not known to Windows # even though documentation states that in case of an unknown locale 0 is returned. @@ -188,10 +204,10 @@ def ansiCodePageFromNVDALocale(localeName: str) -> Optional[str]: # before attempting to retrieve code page. if not englishCountryNameFromNVDALocale(localeName): return None - buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_IDEFAULTANSICODEPAGE, None, 0) + buffLength = winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE.IDEFAULTANSICODEPAGE, None, 0) if buffLength: buf = ctypes.create_unicode_buffer(buffLength) - winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE_IDEFAULTANSICODEPAGE, buf, buffLength) + winKernel.kernel32.GetLocaleInfoEx(localeName, LOCALE.IDEFAULTANSICODEPAGE, buf, buffLength) codePage = buf.value if codePage == CP_ACP: # Some locales such as Hindi are Unicode only i.e. they don't have specific ANSI code page. From 1dbc0eda4943e5fdbf0f8225c795c00c8863423b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Fri, 20 Aug 2021 22:39:21 +0200 Subject: [PATCH 09/13] Simplify implementation of `setLocale` --- source/languageHandler.py | 81 ++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 47 deletions(-) diff --git a/source/languageHandler.py b/source/languageHandler.py index 1bbc3853fb1..ccbf381c7b7 100644 --- a/source/languageHandler.py +++ b/source/languageHandler.py @@ -355,6 +355,22 @@ def localeStringFromLocaleCode(localeCode: str) -> str: return f"{langName}_{countryName}.{codePage}" +def _setPythonLocale(localeString: str) -> bool: + """Sets Python locale to a specified one. + Returns `True` if succesfull `False` if locale cannot be set or retrieved.""" + try: + locale.setlocale(locale.LC_ALL, localeString) + locale.getlocale() + log.debug(f"set python locale to {localeString}") + return True + except locale.Error: + log.debugWarning(f"python locale {localeString} could not be set") + return False + except ValueError: + log.debugWarning(f"python locale {localeString} could not be retrieved with getlocale") + return False + + def setLocale(localeName: str) -> None: ''' Set python's locale using a `localeName` such as "en", "ru_RU", or "es-ES". @@ -367,24 +383,14 @@ def setLocale(localeName: str) -> None: ''' originalLocaleName = localeName localeString = "" - failedToSetLocale = False try: localeString = localeStringFromLocaleCode(localeName) log.debug(f"Win32 locale string from locale code is {localeString}") except ValueError: log.debugWarning(f"Locale {localeName} not supported by Windows") - if localeString: - try: - locale.setlocale(locale.LC_ALL, localeString) - locale.getlocale() - log.debug(f"set python locale to {localeString}") - return - except locale.Error: - failedToSetLocale = True - log.debugWarning(f"python locale {localeString} could not be set") - except ValueError: - failedToSetLocale = True - log.debugWarning(f"python locale {localeString} could not be retrieved with getlocale") + if localeString and _setPythonLocale(localeString): + return + else: # The full form langName_country either cannot be retrieved from Windows # or Python cannot be set to that locale. # Try just with the language name. @@ -395,43 +401,24 @@ def setLocale(localeName: str) -> None: log.debug(f"Win32 locale string from locale code is {localeString}") except ValueError: log.debugWarning(f"Locale {localeName} not supported by Windows") - if localeString: - try: - locale.setlocale(locale.LC_ALL, localeString) - locale.getlocale() - log.debug(f"set python locale to {localeString}") - return - except locale.Error: - failedToSetLocale = True - log.debugWarning(f"python locale {localeString} could not be set") - except ValueError: - failedToSetLocale = True - log.debugWarning(f"python locale {localeString} could not be retrieved with getlocale") + if localeString and _setPythonLocale(localeString): + return + else: # As a final fallback try setting locale just to the English name of the given language. localeFromLang = englishLanguageNameFromNVDALocale(localeName) - if localeFromLang: - try: - locale.setlocale(locale.LC_ALL, localeFromLang) - locale.getlocale() - log.debug(f"set python locale to {localeFromLang}") - return - except locale.Error: - failedToSetLocale = True - log.debugWarning(f"python locale {localeFromLang} could not be set") - except ValueError: - failedToSetLocale = True - log.debugWarning(f"python locale {localeFromLang} could not be retrieved with getlocale") - if not localeString or failedToSetLocale: - # Either Windows does not know the locale, or Python is unable to handle it. - # reset to default locale - if originalLocaleName == curLang: - # reset to system locale default if we can't set the current lang's locale - locale.setlocale(locale.LC_ALL, "") - log.debugWarning(f"set python locale to system default") + if localeFromLang and _setPythonLocale(localeFromLang): + return else: - log.debugWarning(f"setting python locale to the current language {curLang}") - # fallback and try to reset the locale to the current lang - setLocale(curLang) + # Either Windows does not know the locale, or Python is unable to handle it. + # reset to default locale + if originalLocaleName == curLang: + # reset to system locale default if we can't set the current lang's locale + locale.setlocale(locale.LC_ALL, "") + log.debugWarning(f"set python locale to system default") + else: + log.debugWarning(f"setting python locale to the current language {curLang}") + # fallback and try to reset the locale to the current lang + setLocale(curLang) def getLanguage() -> str: From 20b0c27ab9d29851eb970773fe0ac9d308b34e9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Sat, 21 Aug 2021 13:21:14 +0200 Subject: [PATCH 10/13] Revert "Remove work aroud for issue #12160 PR #12250 as it is no longer needed since we're now setting Python's locale to a correct Win32 ones." This reverts commit d2491a4a150e83c950b5e13dab3c4e4a5cb1b224. --- source/logHandler.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/source/logHandler.py b/source/logHandler.py index 049432a83e3..56f4ed98e7d 100755 --- a/source/logHandler.py +++ b/source/logHandler.py @@ -288,6 +288,18 @@ class Formatter(logging.Formatter): def formatException(self, ex): return stripBasePathFromTracebackText(super(Formatter, self).formatException(ex)) + def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str: + """Custom implementation of `formatTime` which avoids `time.localtime` + since it causes a crash under some versions of Universal CRT ( #12160, Python issue 36792) + """ + timeAsFileTime = winKernel.time_tToFileTime(record.created) + timeAsSystemTime = winKernel.SYSTEMTIME() + winKernel.FileTimeToSystemTime(timeAsFileTime, timeAsSystemTime) + timeAsLocalTime = winKernel.SYSTEMTIME() + winKernel.SystemTimeToTzSpecificLocalTime(None, timeAsSystemTime, timeAsLocalTime) + res = f"{timeAsLocalTime.wHour:02d}:{timeAsLocalTime.wMinute:02d}:{timeAsLocalTime.wSecond:02d}" + return self.default_msec_format % (res, record.msecs) + class StreamRedirector(object): """Redirects an output stream to a logger. From 9f123c6368c8db740f5d9232a7634019239e5068 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Sun, 22 Aug 2021 13:28:42 +0200 Subject: [PATCH 11/13] Clarifying comments --- source/logHandler.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/source/logHandler.py b/source/logHandler.py index 56f4ed98e7d..95755317674 100755 --- a/source/logHandler.py +++ b/source/logHandler.py @@ -290,7 +290,8 @@ def formatException(self, ex): def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str: """Custom implementation of `formatTime` which avoids `time.localtime` - since it causes a crash under some versions of Universal CRT ( #12160, Python issue 36792) + since it causes a crash under some versions of Universal CRT when Python locale + is set to a Unicode one ( #12160, Python issue 36792) """ timeAsFileTime = winKernel.time_tToFileTime(record.created) timeAsSystemTime = winKernel.SYSTEMTIME() From cee22c146015e71bc566c057271f50a5ec0f7fc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Golonka?= Date: Wed, 25 Aug 2021 11:40:58 +0200 Subject: [PATCH 12/13] Review actions --- source/languageHandler.py | 49 ++++++++++++++++++--------------------- source/logHandler.py | 2 +- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/source/languageHandler.py b/source/languageHandler.py index ccbf381c7b7..ee8f19c2f2e 100644 --- a/source/languageHandler.py +++ b/source/languageHandler.py @@ -390,35 +390,32 @@ def setLocale(localeName: str) -> None: log.debugWarning(f"Locale {localeName} not supported by Windows") if localeString and _setPythonLocale(localeString): return - else: - # The full form langName_country either cannot be retrieved from Windows - # or Python cannot be set to that locale. - # Try just with the language name. - if "_" in localeName: - localeName = localeName.split("_")[0] - try: - localeString = localeStringFromLocaleCode(localeName) - log.debug(f"Win32 locale string from locale code is {localeString}") - except ValueError: - log.debugWarning(f"Locale {localeName} not supported by Windows") + # The full form langName_country either cannot be retrieved from Windows + # or Python cannot be set to that locale. + # Try just with the language name. + if "_" in localeName: + localeName = localeName.split("_")[0] + try: + localeString = localeStringFromLocaleCode(localeName) + log.debug(f"Win32 locale string from locale code is {localeString}") + except ValueError: + log.debugWarning(f"Locale {localeName} not supported by Windows") if localeString and _setPythonLocale(localeString): return + # As a final fallback try setting locale just to the English name of the given language. + localeFromLang = englishLanguageNameFromNVDALocale(localeName) + if localeFromLang and _setPythonLocale(localeFromLang): + return + # Either Windows does not know the locale, or Python is unable to handle it. + # reset to default locale + if originalLocaleName == curLang: + # reset to system locale default if we can't set the current lang's locale + locale.setlocale(locale.LC_ALL, "") + log.debugWarning(f"set python locale to system default") else: - # As a final fallback try setting locale just to the English name of the given language. - localeFromLang = englishLanguageNameFromNVDALocale(localeName) - if localeFromLang and _setPythonLocale(localeFromLang): - return - else: - # Either Windows does not know the locale, or Python is unable to handle it. - # reset to default locale - if originalLocaleName == curLang: - # reset to system locale default if we can't set the current lang's locale - locale.setlocale(locale.LC_ALL, "") - log.debugWarning(f"set python locale to system default") - else: - log.debugWarning(f"setting python locale to the current language {curLang}") - # fallback and try to reset the locale to the current lang - setLocale(curLang) + log.debugWarning(f"setting python locale to the current language {curLang}") + # fallback and try to reset the locale to the current lang + setLocale(curLang) def getLanguage() -> str: diff --git a/source/logHandler.py b/source/logHandler.py index 95755317674..70e163cc7c1 100755 --- a/source/logHandler.py +++ b/source/logHandler.py @@ -291,7 +291,7 @@ def formatException(self, ex): def formatTime(self, record: logging.LogRecord, datefmt: Optional[str] = None) -> str: """Custom implementation of `formatTime` which avoids `time.localtime` since it causes a crash under some versions of Universal CRT when Python locale - is set to a Unicode one ( #12160, Python issue 36792) + is set to a Unicode one (#12160, Python issue 36792) """ timeAsFileTime = winKernel.time_tToFileTime(record.created) timeAsSystemTime = winKernel.SYSTEMTIME() From 65938576255bfb5776c20615712ee8bfc120d2fe Mon Sep 17 00:00:00 2001 From: buddsean Date: Thu, 26 Aug 2021 10:13:12 +1000 Subject: [PATCH 13/13] update changes --- user_docs/en/changes.t2t | 3 +++ 1 file changed, 3 insertions(+) diff --git a/user_docs/en/changes.t2t b/user_docs/en/changes.t2t index ad9c089bc10..83828a34a4e 100644 --- a/user_docs/en/changes.t2t +++ b/user_docs/en/changes.t2t @@ -29,6 +29,7 @@ What's New in NVDA - When reading a header cell of a table in Chrome, fix the column name being announced twice. (#10840) - NVDA no longer reports a numerical value for UIA sliders that have a textual representation of their value defined. (UIA ValuePattern is now preferred over RangeValuePattern). (#12724) - NVDA no longer treats the value of UIA sliders as always percentage based. +- NVDA no longer sets invalid Python locales. (#12753) - @@ -39,6 +40,8 @@ To match the production build environment, update Visual Studio to keep in sync - Instead use ``apiLevel`` (see the comments at ``_UIAConstants.WinConsoleAPILevel`` for details). - - Transparency of text background color sourced from GDI applications (via the display model), is now exposed for add-ons or appModules. (#12658) +- ``LOCALE_SLANGUAGE``, ``LOCALE_SLIST`` and ``LOCALE_SLANGDISPLAYNAME`` are moved to the ``LOCALE`` enum in languageHandler. +They are still available at the module level but are deprecated and to be removed in NVDA 2022.1. (#12753) -