Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

'Pacific Island' no longer matches Iceland (IS) #34

Merged
merged 4 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/)
## [5.2.3] - 2024-11-04

- "Pacific Island" no longer matches Iceland.
- "Island" no longer matches Iceland when language is not set.

## [5.2.2] - 2024-08-23

Expand Down
12 changes: 10 additions & 2 deletions geoconvert/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
DE_POSTCODE_RANGE,
NUTS_CODES_BY_COUNTRY,
all_nuts_regex,
ambiguous_countries,
mmoriniere marked this conversation as resolved.
Show resolved Hide resolved
br_postcode_regex,
br_state_code_regex,
br_state_name_regex,
Expand Down Expand Up @@ -413,8 +414,15 @@ def _full_name_to_country_code(text, lang, language_to_full_names):
# If the language is unknown, just do not use any language.
language_to_full_names = {}

for lang, full_names in language_to_full_names.items():
country_code = _full_name_to_country_code_for_lang(text, lang, full_names)
for language, full_names in language_to_full_names.items():
# When language is not set, we remove ambiguous countries names from the countries list
mmoriniere marked this conversation as resolved.
Show resolved Hide resolved
if not lang:
full_names = {
name: code
for name, code in full_names.items()
if name not in ambiguous_countries
mmoriniere marked this conversation as resolved.
Show resolved Hide resolved
}
country_code = _full_name_to_country_code_for_lang(text, language, full_names)
if country_code:
return country_code

Expand Down
1 change: 1 addition & 0 deletions geoconvert/data/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .capitals import capitals_de, capitals_en, capitals_fr, language_to_capital_names
from .countries import (
ambiguous_countries,
countries_de,
countries_en,
countries_fr,
Expand Down
5 changes: 5 additions & 0 deletions geoconvert/data/countries.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@
"pacific island": None, # en
}

# We register here names matching common words in other languages to prevent
# matching them when language is not defined
ambiguous_countries = ["island"]


countries_fr = {
**special_countries,
**{
Expand Down
5 changes: 4 additions & 1 deletion tests/test_countries.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,10 @@ class TestCountries:
("Rhode Island", {}, "US"), # en
("Pacific island", {}, None), # en
("Fiji/Pacific island", {}, "FJ"), # en
("island", {}, None), # en
# However, in cases where island is singular instead of plural,
# there can be confusion.
("Solomon Island Nationals", {}, "IS"), # en
("Solomon Island Nationals", {}, None), # en
# Any capitalization for lang works
("Germany", {"lang": "en"}, "DE"),
("Germany", {"lang": "En"}, "DE"),
Expand Down Expand Up @@ -166,6 +167,7 @@ def test_country_name_to_country_code_fr(self, input_data, expected):
("state of palestine", "PS"),
("palestine, state of", "PS"),
("Irak", None),
("island", None),
],
)
def test_country_name_to_country_code_en(self, input_data, expected):
Expand Down Expand Up @@ -194,6 +196,7 @@ def test_country_name_to_country_code_en(self, input_data, expected):
("Dschibuti-Stadt", "DJ"),
("elfenbeinküste und ecuador ", "CI"),
("Fiji/Pacific island", None), # en
("island", "IS"),
],
)
def test_country_name_to_country_code_de(self, input_data, expected):
Expand Down
Loading