Skip to content

Commit

Permalink
Process / return a new typed Lang class in i18n methods
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Aug 2, 2024
1 parent 0573638 commit ac9249c
Show file tree
Hide file tree
Showing 3 changed files with 139 additions and 30 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- New `creator.Creator.convert_and_check_metadata` to convert metadata to bytes or str for known use cases and check proper type is passed to libzim
- Add svg2png image conversion function #113
- Add `conversion.convert_svg2png` image conversion function + support for SVG in `probing.format_for` #113
- Add `i18n.Lang` class used as typed result of i18n operations #151

## Changed

Expand All @@ -31,6 +32,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- **BREAKING** `creator.Creator.add_metadata` and `creator.Creator.validate_metadata` now only accepts `bytes | str` as value (it must have been converted before call)
- **BREAKING** second argument of `creator.Creator.add_metadata` has been renamed to `value` instead of `content` to align with other methods
- When a type issue arises in metadata checks, wrong value type is displayed in exception
- **BREAKING** `i18n.get_language_details()`, `i18n.get_iso_lang_data()`, `i18n.find_language_names()` and `i18n.update_with_macro` now process / return a new typed `Lang` class #151
- **BREAKING** Rename `i18.NotFound` to `i18n.NotFoundError`

### Fixed

Expand Down
106 changes: 77 additions & 29 deletions src/zimscraperlib/i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,63 @@ def setlocale(root_dir: pathlib.Path, locale_name: str):
) from exc


def get_iso_lang_data(lang: str) -> tuple[dict, dict | None]:
"""ISO-639-x languages details for lang. Raises NotFound
class Lang(dict):

Included keys: iso-639-1, iso-639-2b, iso-639-2t, iso-639-3, iso-639-5
english, iso_types
@property
def iso_639_1(self) -> str | None:
"""ISO-639-1 language code"""
return self["iso-639-1"]

See get_language_details() for details"""
@property
def iso_639_2b(self) -> str | None:
"""ISO-639-2b language code"""
return self["iso-639-2b"]

@property
def iso_639_2t(self) -> str | None:
"""ISO-639-2t language code"""
return self["iso-639-2t"]

@property
def iso_639_3(self) -> str | None:
"""ISO-639-3 language code"""
return self["iso-639-3"]

@property
def iso_639_5(self) -> str | None:
"""ISO-639-5 language code"""
return self["iso-639-5"]

@property
def english(self) -> str:
"""language name in English"""
return self["english"]

@property
def native(self) -> str:
"""language name in native language"""
return self["native"]

@property
def iso_types(self) -> list[str]:
"""list of supported iso types"""
return self["iso_types"]

@property
def query(self) -> list[str]:
"""Query issued for these language details"""
return self["query"]

@property
def querytype(self) -> list[str]:
"""Type of query issued to retrieve language details"""
return self["querytype"]


def get_iso_lang_data(lang: str) -> tuple[Lang, Lang | None]:
"""ISO-639-x languages details for lang. Raises NotFoundError
Returns a tuple (main_language, macro_language | None)"""

iso_types = []

Expand Down Expand Up @@ -105,9 +155,9 @@ def replace_types(new_type: str) -> str:
if str(getattr(isolang, code_type)).lower() == lang.lower():
iso_types.append(replace_types(code_type))

lang_data = {
f"iso-639-{lang_}": getattr(isolang, f"pt{lang_}") for lang_ in ISO_LEVELS
}
lang_data = Lang(
**{f"iso-639-{lang_}": getattr(isolang, f"pt{lang_}") for lang_ in ISO_LEVELS}
)
lang_data.update({"english": isolang.name, "iso_types": iso_types})

if isolang.macro():
Expand All @@ -118,53 +168,51 @@ def replace_types(new_type: str) -> str:
return lang_data, None


def find_language_names(
query: str, lang_data: dict | None = None
) -> tuple[str | None, str | None]:
"""(native, english) language names for lang with help from language_details dict
def find_language_names(query: str, lang_data: Lang | None = None) -> tuple[str, str]:
"""(native, english) language names for lang with help from lang_data
Falls back to English name if available or query if not"""
if lang_data is None:
lang_data = get_language_details(query, failsafe=True) or {}
lang_data = get_language_details(query, failsafe=True)
if not lang_data:
return query, query

try:
query_locale = babel.Locale.parse(query)
return query_locale.get_display_name(), query_locale.get_display_name("en")
if native_display_name := query_locale.get_display_name():
if english_display_name := query_locale.get_display_name("en"):
return native_display_name, english_display_name
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
pass

# ISO code lookup order matters (most qualified first)!
for iso_level in [f"iso-639-{lang_}" for lang_ in reversed(ISO_LEVELS)]:
try:
query_locale = babel.Locale.parse(lang_data.get(iso_level))
return query_locale.get_display_name(), query_locale.get_display_name("en")
if native_display_name := query_locale.get_display_name():
if english_display_name := query_locale.get_display_name("en"):
return native_display_name, english_display_name
except (babel.UnknownLocaleError, TypeError, ValueError, AttributeError):
pass
default = lang_data.get("english", query)
default = lang_data.get("english") or query
return default, default


def update_with_macro(lang_data: dict, macro_data: dict | None):
def update_with_macro(lang_data: Lang, macro_data: Lang | None):
"""update empty keys from lang_data with ones of macro_data"""
if macro_data:
for key, value in macro_data.items():
if key in lang_data and not lang_data[key]:
if key in lang_data and not lang_data.get(key):
lang_data[key] = value
return lang_data


def get_language_details(query: str, *, failsafe: bool | None = False) -> dict | None:
def get_language_details(
query: str, failsafe: bool | None = False # noqa: FBT002
) -> Lang | None:
"""language details dict from query.
Raises NotFound or return `und` language details if failsafe
iso-639-1: str ISO-639-1 language code
iso-639-2b: str ISO-639-2b language code
iso-639-2t: str ISO-639-2t language code
iso-639-3: str ISO-639-3 language code
iso-639-5: str ISO-639-5 language code
english: str language name in English
native: str language name in is native language
iso_types: [str] list of supported iso types
When query fails, either raises NotFoundError or return None, based on failsafe
"""

Expand Down
60 changes: 59 additions & 1 deletion tests/i18n/test_i18n.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@

import locale
import pathlib
from unittest.mock import Mock

import pytest

from zimscraperlib.i18n import (
Lang,
NotFoundError,
_,
find_language_names,
Expand Down Expand Up @@ -190,7 +192,19 @@ def test_lang_details(query, expected):
with pytest.raises(NotFoundError):
get_language_details(query)
else:
assert get_language_details(query) == expected
result = get_language_details(query)
assert result == expected
if result:
assert result.iso_639_1 == expected.get("iso-639-1")
assert result.iso_639_2b == expected.get("iso-639-2b")
assert result.iso_639_2t == expected.get("iso-639-2t")
assert result.iso_639_3 == expected.get("iso-639-3")
assert result.iso_639_5 == expected.get("iso-639-5")
assert result.english == expected.get("english")
assert result.native == expected.get("native")
assert result.iso_types == expected.get("iso_types")
assert result.query == expected.get("query")
assert result.querytype == expected.get("querytype")


@pytest.mark.parametrize(
Expand All @@ -201,6 +215,7 @@ def test_lang_details(query, expected):
("bm", ("bamanakan", "Bambara")),
("zh", ("中文", "Chinese")),
("ar", ("العربية", "Arabic")),
("qq", ("qq", "qq")),
],
)
def test_lang_name(query, expected):
Expand All @@ -214,3 +229,46 @@ def test_lang_name(query, expected):
def test_translation(lang, expected):
setlocale(pathlib.Path(__file__).parent, lang)
assert _("Hello World!") == expected


@pytest.mark.parametrize(
"dict_data",
[{}, {"iso-639-1": "ar"}],
)
def test_lang_equals(dict_data):
assert Lang(dict_data) == Lang(dict_data)
assert Lang(dict_data) == Lang({**dict_data})


@pytest.mark.parametrize(
"dict_data_left, dict_data_right",
[
({}, {"iso-639-1": "ar"}),
({"iso-639-1": "ar"}, {"iso-639-1": "ab"}),
({"iso-639-1": "ar"}, {"iso-639-2": "ar"}),
],
)
def test_lang_not_equals(dict_data_left, dict_data_right):
assert Lang(dict_data_left) != Lang(dict_data_right)
assert Lang(dict_data_left) != "foo"


@pytest.mark.parametrize(
"babel_native_return, babel_english_return, expected_native, expected_english",
[
("Native value", "English value", "Native value", "English value"),
(None, "English value", "German", "German"),
("Native value", None, "German", "German"),
],
)
def test_find_language_names(
mocker, babel_native_return, babel_english_return, expected_native, expected_english
):
mock_locale = Mock()
mock_locale.get_display_name.side_effect = lambda lang=None: (
babel_native_return if lang is None else babel_english_return
)

mocker.patch("babel.Locale.parse", return_value=mock_locale)

assert find_language_names("de") == (expected_native, expected_english)

0 comments on commit ac9249c

Please sign in to comment.