From 8b46ed81bf749df38362bba3465e4fec918728e5 Mon Sep 17 00:00:00 2001 From: miro Date: Wed, 16 Oct 2024 00:33:51 +0100 Subject: [PATCH 1/5] fix:standardize_lang --- ovos_padatious/opm.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/ovos_padatious/opm.py b/ovos_padatious/opm.py index 39618ab..cbcdb5d 100644 --- a/ovos_padatious/opm.py +++ b/ovos_padatious/opm.py @@ -25,6 +25,7 @@ from ovos_padatious import IntentContainer as PadatiousIntentContainer from ovos_padatious.match_data import MatchData as PadatiousIntent from ovos_utils import flatten_list +from ovos_utils.lang import standardize_lang_tag from ovos_utils.log import LOG from ovos_utils.xdg_utils import xdg_data_home from ovos_plugin_manager.templates.pipeline import PipelinePlugin, IntentMatch @@ -47,7 +48,7 @@ def _match_level(self, utterances, limit, lang=None, message: Optional[Message] LOG.debug(f'Padatious Matching confidence > {limit}') # call flatten in case someone is sending the old style list of tuples utterances = flatten_list(utterances) - lang = lang or self.service.lang + lang = standardize_lang_tag(lang or self.service.lang) padatious_intent = self.service.calc_intent(utterances, lang, message) if padatious_intent is not None and padatious_intent.conf > limit: skill_id = padatious_intent.name.split(':')[0] @@ -92,8 +93,9 @@ def __init__(self, bus, config): self.bus = bus core_config = Configuration() - self.lang = core_config.get("lang", "en-us") + self.lang = standardize_lang_tag(core_config.get("lang", "en-US")) langs = core_config.get('secondary_langs') or [] + langs = [standardize_lang_tag(l) for l in langs] if self.lang not in langs: langs.append(self.lang) @@ -211,7 +213,7 @@ def register_intent(self, message): message (Message): message triggering action """ lang = message.data.get('lang', self.lang) - lang = lang.lower() + lang = standardize_lang_tag(lang) if lang in self.containers: self.registered_intents.append(message.data['name']) self._register_object(message, 'intent', self.containers[lang].add_intent) @@ -223,7 +225,7 @@ def register_entity(self, message): message (Message): message triggering action """ lang = message.data.get('lang', self.lang) - lang = lang.lower() + lang = standardize_lang_tag(lang) if lang in self.containers: self.registered_entities.append(message.data) self._register_object(message, 'entity', @@ -247,8 +249,9 @@ def calc_intent(self, utterances: List[str], lang: str = None, return None lang = lang or self.lang - lang = lang.lower() + lang = standardize_lang_tag(lang) sess = SessionManager.get(message) + # TODO - allow close langs, match dialects if lang in self.containers: intent_container = self.containers.get(lang) intents = [_calc_padatious_intent(utt, intent_container, sess) From 0e7a2db26e09e04ea60db3a7bbf1f76f52c951e0 Mon Sep 17 00:00:00 2001 From: miro Date: Wed, 16 Oct 2024 01:00:32 +0100 Subject: [PATCH 2/5] fix:standardize_lang --- ovos_padatious/intent_container.py | 2 ++ ovos_padatious/opm.py | 37 +++++++++++++++++++++--------- requirements.txt | 3 ++- 3 files changed, 30 insertions(+), 12 deletions(-) diff --git a/ovos_padatious/intent_container.py b/ovos_padatious/intent_container.py index 5714d55..abd0e1f 100644 --- a/ovos_padatious/intent_container.py +++ b/ovos_padatious/intent_container.py @@ -14,6 +14,7 @@ import inspect import json import os +import time from ovos_padatious import padaos import sys @@ -249,6 +250,7 @@ def train(self, debug=True, force=False, single_thread=False, timeout=20): timeout=timeout ), daemon=True) self.train_thread.start() + time.sleep(0.5) self.train_thread.join(timeout) self.must_train = False diff --git a/ovos_padatious/opm.py b/ovos_padatious/opm.py index cbcdb5d..5a77340 100644 --- a/ovos_padatious/opm.py +++ b/ovos_padatious/opm.py @@ -29,7 +29,7 @@ from ovos_utils.log import LOG from ovos_utils.xdg_utils import xdg_data_home from ovos_plugin_manager.templates.pipeline import PipelinePlugin, IntentMatch - +from langcodes import closest_match class PadatiousMatcher: """Matcher class to avoid redundancy in padatious intent matching.""" @@ -249,17 +249,32 @@ def calc_intent(self, utterances: List[str], lang: str = None, return None lang = lang or self.lang - lang = standardize_lang_tag(lang) + + lang = self._get_closest_lang(lang) + if lang is None: # no intents registered for this lang + return None + sess = SessionManager.get(message) - # TODO - allow close langs, match dialects - if lang in self.containers: - intent_container = self.containers.get(lang) - intents = [_calc_padatious_intent(utt, intent_container, sess) - for utt in utterances] - intents = [i for i in intents if i is not None] - # select best - if intents: - return max(intents, key=lambda k: k.conf) + + intent_container = self.containers.get(lang) + intents = [_calc_padatious_intent(utt, intent_container, sess) + for utt in utterances] + intents = [i for i in intents if i is not None] + # select best + if intents: + return max(intents, key=lambda k: k.conf) + + def _get_closest_lang(self, lang: str) -> Optional[str]: + if self.containers: + lang = standardize_lang_tag(lang) + closest, score = closest_match(lang, list(self.containers.keys())) + # https://langcodes-hickford.readthedocs.io/en/sphinx/index.html#distance-values + # 0 -> These codes represent the same language, possibly after filling in values and normalizing. + # 1- 3 -> These codes indicate a minor regional difference. + # 4 - 10 -> These codes indicate a significant but unproblematic regional difference. + if score < 10: + return closest + return None def shutdown(self): self.bus.remove('padatious:register_intent', self.register_intent) diff --git a/requirements.txt b/requirements.txt index 0c84e7c..4e94aaf 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ fann2>=1.0.7, < 1.1.0 xxhash ovos-plugin-manager>=0.0.26 -ovos-workshop>=0.1.7,<2.0.0 \ No newline at end of file +ovos-workshop>=0.1.7,<2.0.0 +langcodes \ No newline at end of file From 37a920bf8fb736cb8c840d7c5b99a7f282c60711 Mon Sep 17 00:00:00 2001 From: miro Date: Wed, 16 Oct 2024 01:11:35 +0100 Subject: [PATCH 3/5] fix:standardize_lang --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 4e94aaf..ae04446 100755 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ fann2>=1.0.7, < 1.1.0 xxhash ovos-plugin-manager>=0.0.26 ovos-workshop>=0.1.7,<2.0.0 +ovos-utils>=0.3.4,<1.0.0 langcodes \ No newline at end of file From 3d588155ef8ac37f62ab72b22a6b8176b5f7fbe8 Mon Sep 17 00:00:00 2001 From: miro Date: Wed, 16 Oct 2024 02:15:50 +0100 Subject: [PATCH 4/5] fix:standardize_lang --- tests/test_container.py | 45 +++++++++++++++-------------------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/tests/test_container.py b/tests/test_container.py index 57f5e20..cd99eb5 100644 --- a/tests/test_container.py +++ b/tests/test_container.py @@ -11,14 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from time import monotonic -import unittest import os -import pytest import random -from os import mkdir -from os.path import isdir, join -from shutil import rmtree +import unittest +from os.path import join +from time import monotonic + +import pytest from ovos_padatious.intent_container import IntentContainer @@ -34,7 +33,7 @@ class TestFromDisk(unittest.TestCase): other_entities = ['else\n', 'different\n'] def setUp(self): - self.cont = IntentContainer('temp') + self.cont = IntentContainer('/tmp/cache2') def _add_intent(self): self.cont.add_intent('test', self.test_lines) @@ -45,23 +44,19 @@ def _add_intent(self): self._write_train_data() def _write_train_data(self): - - if not isdir('temp'): - mkdir('temp') - - fn1 = join('temp', 'test.intent') + fn1 = join('/tmp/cache2', 'test.intent') with open(fn1, 'w') as f: f.writelines(self.test_lines_with_entities) - fn2 = join('temp', 'other.intent') + fn2 = join('/tmp/cache2', 'other.intent') with open(fn2, 'w') as f: f.writelines(self.other_lines_with_entities) - fn1 = join('temp', 'test.entity') + fn1 = join('/tmp/cache2', 'test.entity') with open(fn1, 'w') as f: f.writelines(self.test_entities) - fn2 = join('temp', 'other.entity') + fn2 = join('/tmp/cache2', 'other.entity') with open(fn2, 'w') as f: f.writelines(self.other_entities) @@ -70,7 +65,7 @@ def test_instantiate_from_disk(self): self._add_intent() # instantiate from disk (load cached files) - cont = IntentContainer('temp') + cont = IntentContainer('/tmp/cache2') cont.instantiate_from_disk() assert len(cont.intents.train_data.sent_lists) == 0 @@ -92,21 +87,18 @@ class TestIntentContainer(unittest.TestCase): other_entities = ['else\n', 'different\n'] def setUp(self): - self.cont = IntentContainer('temp') + self.cont = IntentContainer('/tmp/cache') def _add_intent(self): self.cont.add_intent('test', self.test_lines) self.cont.add_intent('other', self.other_lines) def test_load_intent(self): - if not isdir('temp'): - mkdir('temp') - - fn1 = join('temp', 'test.txt') + fn1 = join('/tmp', 'test.txt') with open(fn1, 'w') as f: f.writelines(self.test_lines) - fn2 = join('temp', 'other.txt') + fn2 = join('/tmp', 'other.txt') with open(fn2, 'w') as f: f.writelines(self.other_lines) @@ -122,7 +114,6 @@ def test(a, b): test(False, False) test(True, True) - def _create_large_intent(self, depth): if depth == 0: return '(a|b|)' @@ -183,8 +174,8 @@ def test_calc_intents(self): intents = self.cont.calc_intents('this is another test') assert ( - intents[0].conf > intents[1].conf) == ( - intents[0].name == 'test') + intents[0].conf > intents[1].conf) == ( + intents[0].name == 'test') assert self.cont.calc_intent('this is another test').name == 'test' def test_empty(self): @@ -252,7 +243,3 @@ def test_generalize(self): intent = self.cont.calc_intent('make a timer for 3 minute') assert intent.name == 'timer' assert intent.matches == {'time': '3'} - - def teardown(self): - if isdir('temp'): - rmtree('temp') From 7e7f951a484f373ec0bbbb34ffd1348df03e41a3 Mon Sep 17 00:00:00 2001 From: miro Date: Wed, 16 Oct 2024 02:23:48 +0100 Subject: [PATCH 5/5] fix:standardize_lang --- ovos_padatious/intent_container.py | 2 -- ovos_padatious/opm.py | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/ovos_padatious/intent_container.py b/ovos_padatious/intent_container.py index abd0e1f..5714d55 100644 --- a/ovos_padatious/intent_container.py +++ b/ovos_padatious/intent_container.py @@ -14,7 +14,6 @@ import inspect import json import os -import time from ovos_padatious import padaos import sys @@ -250,7 +249,6 @@ def train(self, debug=True, force=False, single_thread=False, timeout=20): timeout=timeout ), daemon=True) self.train_thread.start() - time.sleep(0.5) self.train_thread.join(timeout) self.must_train = False diff --git a/ovos_padatious/opm.py b/ovos_padatious/opm.py index 5a77340..b93fd1b 100644 --- a/ovos_padatious/opm.py +++ b/ovos_padatious/opm.py @@ -31,6 +31,7 @@ from ovos_plugin_manager.templates.pipeline import PipelinePlugin, IntentMatch from langcodes import closest_match + class PadatiousMatcher: """Matcher class to avoid redundancy in padatious intent matching."""