Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

std lang codes #13

Merged
merged 5 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions ovos_padatious/intent_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import inspect
import json
import os
import time

from ovos_padatious import padaos
import sys
Expand Down Expand Up @@ -249,6 +250,7 @@ def train(self, debug=True, force=False, single_thread=False, timeout=20):
timeout=timeout
), daemon=True)
self.train_thread.start()
time.sleep(0.5)
self.train_thread.join(timeout)

JarbasAl marked this conversation as resolved.
Show resolved Hide resolved
self.must_train = False
Expand Down
46 changes: 32 additions & 14 deletions ovos_padatious/opm.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,11 @@
from ovos_padatious import IntentContainer as PadatiousIntentContainer
from ovos_padatious.match_data import MatchData as PadatiousIntent
from ovos_utils import flatten_list
from ovos_utils.lang import standardize_lang_tag
from ovos_utils.log import LOG
from ovos_utils.xdg_utils import xdg_data_home
from ovos_plugin_manager.templates.pipeline import PipelinePlugin, IntentMatch

from langcodes import closest_match

class PadatiousMatcher:
"""Matcher class to avoid redundancy in padatious intent matching."""
Expand All @@ -47,7 +48,7 @@ def _match_level(self, utterances, limit, lang=None, message: Optional[Message]
LOG.debug(f'Padatious Matching confidence > {limit}')
# call flatten in case someone is sending the old style list of tuples
utterances = flatten_list(utterances)
lang = lang or self.service.lang
lang = standardize_lang_tag(lang or self.service.lang)
padatious_intent = self.service.calc_intent(utterances, lang, message)
if padatious_intent is not None and padatious_intent.conf > limit:
skill_id = padatious_intent.name.split(':')[0]
Expand Down Expand Up @@ -92,8 +93,9 @@ def __init__(self, bus, config):
self.bus = bus

core_config = Configuration()
self.lang = core_config.get("lang", "en-us")
self.lang = standardize_lang_tag(core_config.get("lang", "en-US"))
langs = core_config.get('secondary_langs') or []
langs = [standardize_lang_tag(l) for l in langs]
JarbasAl marked this conversation as resolved.
Show resolved Hide resolved
if self.lang not in langs:
langs.append(self.lang)

Expand Down Expand Up @@ -211,7 +213,7 @@ def register_intent(self, message):
message (Message): message triggering action
"""
lang = message.data.get('lang', self.lang)
lang = lang.lower()
lang = standardize_lang_tag(lang)
if lang in self.containers:
self.registered_intents.append(message.data['name'])
self._register_object(message, 'intent', self.containers[lang].add_intent)
Expand All @@ -223,7 +225,7 @@ def register_entity(self, message):
message (Message): message triggering action
"""
lang = message.data.get('lang', self.lang)
lang = lang.lower()
lang = standardize_lang_tag(lang)
if lang in self.containers:
self.registered_entities.append(message.data)
self._register_object(message, 'entity',
Expand All @@ -247,16 +249,32 @@ def calc_intent(self, utterances: List[str], lang: str = None,
return None

lang = lang or self.lang
lang = lang.lower()

lang = self._get_closest_lang(lang)
if lang is None: # no intents registered for this lang
return None

sess = SessionManager.get(message)
if lang in self.containers:
intent_container = self.containers.get(lang)
intents = [_calc_padatious_intent(utt, intent_container, sess)
for utt in utterances]
intents = [i for i in intents if i is not None]
# select best
if intents:
return max(intents, key=lambda k: k.conf)

intent_container = self.containers.get(lang)
intents = [_calc_padatious_intent(utt, intent_container, sess)
for utt in utterances]
JarbasAl marked this conversation as resolved.
Show resolved Hide resolved
intents = [i for i in intents if i is not None]
# select best
if intents:
return max(intents, key=lambda k: k.conf)

def _get_closest_lang(self, lang: str) -> Optional[str]:
if self.containers:
lang = standardize_lang_tag(lang)
closest, score = closest_match(lang, list(self.containers.keys()))
# https://langcodes-hickford.readthedocs.io/en/sphinx/index.html#distance-values
# 0 -> These codes represent the same language, possibly after filling in values and normalizing.
# 1- 3 -> These codes indicate a minor regional difference.
# 4 - 10 -> These codes indicate a significant but unproblematic regional difference.
if score < 10:
return closest
return None

def shutdown(self):
self.bus.remove('padatious:register_intent', self.register_intent)
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
fann2>=1.0.7, < 1.1.0
xxhash
ovos-plugin-manager>=0.0.26
ovos-workshop>=0.1.7,<2.0.0
ovos-workshop>=0.1.7,<2.0.0
ovos-utils>=0.3.4,<1.0.0
langcodes
45 changes: 16 additions & 29 deletions tests/test_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from time import monotonic
import unittest
import os
import pytest
import random
from os import mkdir
from os.path import isdir, join
from shutil import rmtree
import unittest
from os.path import join
from time import monotonic

import pytest

from ovos_padatious.intent_container import IntentContainer

Expand All @@ -34,7 +33,7 @@ class TestFromDisk(unittest.TestCase):
other_entities = ['else\n', 'different\n']

def setUp(self):
self.cont = IntentContainer('temp')
self.cont = IntentContainer('/tmp/cache2')

def _add_intent(self):
self.cont.add_intent('test', self.test_lines)
Expand All @@ -45,23 +44,19 @@ def _add_intent(self):
self._write_train_data()

def _write_train_data(self):

if not isdir('temp'):
mkdir('temp')

fn1 = join('temp', 'test.intent')
fn1 = join('/tmp/cache2', 'test.intent')
with open(fn1, 'w') as f:
f.writelines(self.test_lines_with_entities)

fn2 = join('temp', 'other.intent')
fn2 = join('/tmp/cache2', 'other.intent')
with open(fn2, 'w') as f:
f.writelines(self.other_lines_with_entities)

fn1 = join('temp', 'test.entity')
fn1 = join('/tmp/cache2', 'test.entity')
with open(fn1, 'w') as f:
f.writelines(self.test_entities)

fn2 = join('temp', 'other.entity')
fn2 = join('/tmp/cache2', 'other.entity')
with open(fn2, 'w') as f:
f.writelines(self.other_entities)

Expand All @@ -70,7 +65,7 @@ def test_instantiate_from_disk(self):
self._add_intent()

# instantiate from disk (load cached files)
cont = IntentContainer('temp')
cont = IntentContainer('/tmp/cache2')
cont.instantiate_from_disk()

assert len(cont.intents.train_data.sent_lists) == 0
Expand All @@ -92,21 +87,18 @@ class TestIntentContainer(unittest.TestCase):
other_entities = ['else\n', 'different\n']

def setUp(self):
self.cont = IntentContainer('temp')
self.cont = IntentContainer('/tmp/cache')

def _add_intent(self):
self.cont.add_intent('test', self.test_lines)
self.cont.add_intent('other', self.other_lines)

def test_load_intent(self):
if not isdir('temp'):
mkdir('temp')

fn1 = join('temp', 'test.txt')
fn1 = join('/tmp', 'test.txt')
with open(fn1, 'w') as f:
f.writelines(self.test_lines)

fn2 = join('temp', 'other.txt')
fn2 = join('/tmp', 'other.txt')
with open(fn2, 'w') as f:
f.writelines(self.other_lines)

Expand All @@ -122,7 +114,6 @@ def test(a, b):
test(False, False)
test(True, True)


def _create_large_intent(self, depth):
if depth == 0:
return '(a|b|)'
Expand Down Expand Up @@ -183,8 +174,8 @@ def test_calc_intents(self):

intents = self.cont.calc_intents('this is another test')
assert (
intents[0].conf > intents[1].conf) == (
intents[0].name == 'test')
intents[0].conf > intents[1].conf) == (
intents[0].name == 'test')
assert self.cont.calc_intent('this is another test').name == 'test'

def test_empty(self):
Expand Down Expand Up @@ -252,7 +243,3 @@ def test_generalize(self):
intent = self.cont.calc_intent('make a timer for 3 minute')
assert intent.name == 'timer'
assert intent.matches == {'time': '3'}

def teardown(self):
if isdir('temp'):
rmtree('temp')
Loading