Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor/padacioso #183

Merged
merged 5 commits into from
Aug 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion mycroft/configuration/mycroft.conf
Original file line number Diff line number Diff line change
Expand Up @@ -483,7 +483,9 @@
"intent_cache": "~/.local/share/mycroft/intent_cache",
"train_delay": 4,
"single_thread": false,
"padaos_only": false
// fallback settings for padacioso (pure regex)
"regex_only": false,
"fuzz": true
},

"Audio": {
Expand Down
2 changes: 1 addition & 1 deletion mycroft/skills/intent_service_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def register_padatious_intent(self, intent_name, filename, lang):
if not isinstance(filename, str):
raise ValueError('Filename path must be a string')
if not exists(filename):
raise FileNotFoundError('Unable to find "{}"'.format(filename))
raise FileNotFoundError(f'Unable to find "{filename}"')

data = {'file_name': filename,
'name': intent_name,
Expand Down
117 changes: 73 additions & 44 deletions mycroft/skills/intent_services/padatious_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
# limitations under the License.
#
"""Intent service wrapping padatious."""
from functools import lru_cache
from subprocess import call
from threading import Event
from time import time as get_time, sleep
Expand All @@ -26,7 +25,43 @@
from mycroft.util.log import LOG
from mycroft.skills.intent_services.base import IntentMatch

from padaos import IntentContainer as PadaosIntentContainer
from padacioso import IntentContainer as FallbackIntentContainer

try:
import padatious as _pd
from padatious.match_data import MatchData as PadatiousIntent
except ImportError:
_pd = None

# padatious is optional, this class is just for compat
class PadatiousIntent:
"""
A set of data describing how a query fits into an intent
Attributes:
name (str): Name of matched intent
sent (str): The query after entity extraction
conf (float): Confidence (from 0.0 to 1.0)
matches (dict of str -> str): Key is the name of the entity and
value is the extracted part of the sentence
"""

def __init__(self, name, sent, matches=None, conf=0.0):
self.name = name
self.sent = sent
self.matches = matches or {}
self.conf = conf

def __getitem__(self, item):
return self.matches.__getitem__(item)

def __contains__(self, item):
return self.matches.__contains__(item)

def get(self, key, default=None):
return self.matches.get(key, default)

def __repr__(self):
return repr(self.__dict__)


class PadatiousMatcher:
Expand All @@ -53,17 +88,6 @@ def _match_level(self, utterances, limit, lang=None):
for utt in utterances:
for variant in utt:
intent = self.service.calc_intent(variant, lang)
if self.service._padaos:
if not intent.get("name"):
continue
# exact matches only
return IntentMatch(
'Padaos',
intent["name"],
intent["entities"],
intent["name"].split(':')[0]
)

if intent:
best = padatious_intent.conf if padatious_intent else 0.0
if best < intent.conf:
Expand Down Expand Up @@ -114,34 +138,29 @@ def __init__(self, bus, config):
self.padatious_config = config
self.bus = bus
intent_cache = expanduser(self.padatious_config['intent_cache'])
self._padaos = self.padatious_config.get("padaos_only", False)

core_config = Configuration()
self.lang = core_config.get("lang", "en-us")
langs = core_config.get('secondary_langs') or []
if self.lang not in langs:
langs.append(self.lang)

try:
if not self._padaos:
from padatious import IntentContainer
self.containers = {
lang: IntentContainer(path.join(intent_cache, lang))
for lang in langs}
except ImportError:
LOG.error('Padatious not installed. Falling back to Padaos, pure regex alternative')
try:
call(['notify-send', 'Padatious not installed',
'Falling back to Padaos, pure regex alternative'])
except OSError:
pass
self._padaos = True

if self._padaos:
LOG.warning('using padaos instead of padatious. Some intents may '
'be hard to trigger')
self.containers = {lang: PadaosIntentContainer()
if self.is_regex_only:
if not _pd:
LOG.error('Padatious not installed. Falling back to pure regex alternative')
try:
call(['notify-send', 'Padatious not installed',
'Falling back to pure regex alternative'])
except OSError:
pass
LOG.warning('using pure regex intent parser. '
'Some intents may be hard to trigger')
self.containers = {lang: FallbackIntentContainer(self.padatious_config.get("fuzz"))
for lang in langs}
else:
self.containers = {
lang: _pd.IntentContainer(path.join(intent_cache, lang))
for lang in langs}

self.bus.on('padatious:register_intent', self.register_intent)
self.bus.on('padatious:register_entity', self.register_entity)
Expand All @@ -158,25 +177,30 @@ def __init__(self, bus, config):
self.registered_intents = []
self.registered_entities = []

@property
def is_regex_only(self):
if not _pd:
return True
return self.padatious_config.get("regex_only") or False

def train(self, message=None):
"""Perform padatious training.

Args:
message (Message): optional triggering message
"""
self.finished_training_event.clear()
if not self._padaos:
if not self.is_regex_only:
padatious_single_thread = self.padatious_config['single_thread']
if message is None:
single_thread = padatious_single_thread
else:
single_thread = message.data.get('single_thread',
padatious_single_thread)
LOG.info('Training... (single_thread={})'.format(single_thread))
for lang in self.containers:
self.containers[lang].train(single_thread=single_thread)
LOG.info('Training complete.')

LOG.info('Training complete.')
self.finished_training_event.set()
if not self.finished_initial_train:
self.bus.emit(Message('mycroft.skills.trained'))
Expand Down Expand Up @@ -241,7 +265,7 @@ def _register_object(self, message, object_name, register_func):
LOG.warning('Could not find file ' + file_name)
return

if self._padaos:
if self.is_regex_only:
# padaos does not accept a file path like padatious
with open(file_name) as f:
samples = [l.strip() for l in f.readlines()]
Expand All @@ -258,9 +282,10 @@ def register_intent(self, message):
message (Message): message triggering action
"""
lang = message.data.get('lang', self.lang)
lang = lang.lower()
if lang in self.containers:
self.registered_intents.append(message.data['name'])
if self._padaos:
if self.is_regex_only:
self._register_object(
message, 'intent', self.containers[lang].add_intent)
else:
Expand All @@ -274,9 +299,10 @@ def register_entity(self, message):
message (Message): message triggering action
"""
lang = message.data.get('lang', self.lang)
lang = lang.lower()
if lang in self.containers:
self.registered_entities.append(message.data)
if self._padaos:
if self.is_regex_only:
self._register_object(
message, 'intent', self.containers[lang].add_entity)
else:
Expand All @@ -289,13 +315,16 @@ def calc_intent(self, utt, lang=None):
This improves speed when called multiple times for different confidence
levels.

NOTE: This cache will keep a reference to this class
(PadatiousService), but we can live with that since it is used as a
singleton.

Args:
utt (str): utterance to calculate best intent for
"""
lang = lang or self.lang
lang = lang.lower()
if lang in self.containers:
return self.containers[lang].calc_intent(utt)
intent = self.containers[lang].calc_intent(utt)
if isinstance(intent, dict):
if "entities" in intent:
intent["matches"] = intent.pop("entities")
intent["sent"] = utt
intent = PadatiousIntent(**intent)
return intent
2 changes: 1 addition & 1 deletion requirements/extra-skills-lgpl.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
adapt-parser~=0.5
padaos~=0.1
padacioso~=0.1.2
ovos-lingua-franca~=0.4, >=0.4.2
PyYAML~=5.4
ovos_workshop~=0.0, >=0.0.7a9
Expand Down
2 changes: 1 addition & 1 deletion requirements/extra-skills.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
adapt-parser~=0.5
padaos~=0.1
padacioso~=0.1.2
ovos-lingua-franca~=0.4, >=0.4.2
PyYAML~=5.4
ovos_workshop~=0.0, >=0.0.7a9
59 changes: 59 additions & 0 deletions test/unittests/skills/test_intent_service_interface.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import unittest

from adapt.intent import IntentBuilder
from mycroft.skills.intent_service_interface import IntentServiceInterface


Expand Down Expand Up @@ -84,3 +85,61 @@ def test_register_regex(self):
intent_service = IntentServiceInterface(self.emitter)
intent_service.register_adapt_regex('.*', lang="en-us")
self.check_emitter([{'regex': '.*', 'lang': 'en-us'}])


class KeywordIntentRegistrationTest(unittest.TestCase):
def check_emitter(self, expected_message_data):
"""Verify that the registration messages matches the expected."""
for msg_type in self.emitter.get_types():
self.assertEqual(msg_type, 'register_intent')

self.assertEqual(
sorted(self.emitter.get_results(),
key=lambda d: sorted(d.items())),
sorted(expected_message_data, key=lambda d: sorted(d.items())))
self.emitter.reset()

def setUp(self):
self.emitter = MockEmitter()

def test_register_intent(self):
intent_service = IntentServiceInterface(self.emitter)
intent_service.register_adapt_keyword('testA', 'testA', lang='en-US')
intent_service.register_adapt_keyword('testB', 'testB', lang='en-US')
self.emitter.reset()

intent = IntentBuilder("test").require("testA").optionally("testB")
intent_service.register_adapt_intent("test", intent)
expected_data = {'at_least_one': [],
'name': 'test',
'optional': [('testB', 'testB')],
'requires': [('testA', 'testA')]}
self.check_emitter([expected_data])



class UtteranceIntentRegistrationTest(unittest.TestCase):
def check_emitter(self, expected_message_data):
"""Verify that the registration messages matches the expected."""
for msg_type in self.emitter.get_types():
self.assertEqual(msg_type, 'padatious:register_intent')

self.assertEqual(
sorted(self.emitter.get_results(),
key=lambda d: sorted(d.items())),
sorted(expected_message_data, key=lambda d: sorted(d.items())))
self.emitter.reset()

def setUp(self):
self.emitter = MockEmitter()

def test_register_intent(self):
intent_service = IntentServiceInterface(self.emitter)
filename = "/tmp/test.intent"
with open(filename, "w") as f:
f.write("this is a test\ntest the intent")

intent_service.register_padatious_intent('test', filename, lang='en-US')
expected_data = {'file_name': '/tmp/test.intent', 'lang': 'en-US', 'name': 'test'}
self.check_emitter([expected_data])

Loading