-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
1d9be5e
commit 824bf74
Showing
8 changed files
with
128 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -47,3 +47,4 @@ MANIFEST | |
|
||
# Per-project virtualenvs | ||
.virtualenv/ | ||
.python-version |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
[pytest] | ||
testpaths = tests | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
import re | ||
from typing import Pattern | ||
|
||
from num2words import num2words | ||
|
||
|
||
NUMS_REGEX = re.compile(r"(\d+,?\u00A0?\d+)|(\d+\w+)|(\d)+") | ||
FIND_MULTIPLE_SPACES_REG = re.compile(r'\s{2,}') | ||
FIND_PUNCTUATIONS_REG = re.compile(r"[/°\-,;!?.()\[\]*…—]") | ||
|
||
|
||
def get_numbers(text): | ||
return NUMS_REGEX.split(text) | ||
|
||
|
||
def replace_numbers(inp: str, locale: str, ordinal_regex: Pattern = None): | ||
finalinp = '' | ||
for e in get_numbers(inp): | ||
if not e: | ||
continue | ||
newinp = e | ||
try: | ||
ee = ''.join(e.split()) | ||
if int(e) >= 0: | ||
newinp = num2words(int(ee), lang=locale) | ||
except ValueError: | ||
try: | ||
ee = ''.join(e.replace(',', '.').split()) | ||
if float(ee): | ||
newinp = num2words(float(ee), lang=locale) | ||
except ValueError: | ||
if ordinal_regex: | ||
matches = ordinal_regex.match(e) | ||
if matches: | ||
newinp = num2words(int(matches.group(1)), ordinal=True, lang=locale) | ||
|
||
finalinp += newinp | ||
|
||
return finalinp | ||
|
||
|
||
def maybe_normalize(value: str, mapping): | ||
for norm in mapping: | ||
if type(norm[0]) == str: | ||
value = value.replace(norm[0], norm[1]) | ||
elif isinstance(norm[0], Pattern): | ||
value = norm[0].sub(norm[1], value) | ||
else: | ||
print('UNEXPECTED', type(norm[0]), norm[0]) | ||
|
||
return value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
import pytest | ||
|
||
from corporacreator import preprocessors | ||
|
||
|
||
@pytest.mark.parametrize('locale, client_id, sentence, expected', [ | ||
('fr', '*', 'Faisons donc attention à utiliser les bons mots.', 'faisons donc attention a utiliser les bons mots'), | ||
('fr', '*', "bah 98%", "bah quatre vingt dix huit pourcent"), | ||
('fr', '*', "prix au m2", "prix au metre carre"), | ||
('fr', '*', "prix au m²", "prix au metre carre"), | ||
('fr', '*', "10 m²", "dix metre carre"), | ||
('fr', '*', "2éme page", "deuxieme page"), | ||
('fr', '*', "donc, ce sera 299 € + 99 €", "donc ce sera deux cent quatre vingt dix neuf euros plus quatre vingt dix neuf euros"), | ||
('fr', '*', "ok pour 18h", "ok pour dix huit heure"), | ||
('fr', '*', '2 0 200', "deux zero deux cents"), | ||
('fr', '*', 'rue Coq-Héron au nº13', "rue coq heron au numero treize"), | ||
('fr', '*', "En comparaison, la Lune orbite en moyenne à 390 000 km de la Terre", "en comparaison la lune orbite en moyenne a trois cent quatre vingt dix mille kilometres de la terre"), | ||
]) | ||
def test_preprocessor(locale, client_id, sentence, expected): | ||
preprocessor = getattr(preprocessors, locale.replace('-', '')) | ||
assert expected == preprocessor(client_id, preprocessors.common(sentence)) |