Support external pronunciation lexicons

rhasspy · Dec 6, 2021 · 6226382 · 6226382
1 parent 8e81436
commit 6226382
Show file tree

Hide file tree

Showing 6 changed files with 128 additions and 11 deletions.
diff --git a/CHANGELOG b/CHANGELOG
@@ -3,10 +3,12 @@
 ### Added
 
 - Support for Luxembourgish (credit: mbarnig)
+- External pronunciation lexicons with <lexicon uri="...">
 
 ### Fixed
 
 - Choose first word pronunciation when all pronunciations have roles instead of last
+- Word role is on <lexeme> instead of <grapheme>
 
 ## [2.1.0] - 2021 Nov 10
 

diff --git a/README.md b/README.md
@@ -324,12 +324,14 @@ A subset of [SSML](https://www.w3.org/TR/speech-synthesis11/) is supported:
 * `<sub alias="">` - substitute `alias` for inner text
 * `<phoneme ph="...">` - supply phonemes for inner text
     * `ph` - phonemes for each word of inner text, separated by whitespace
-* `<lexicon id="...">` - inline pronunciation lexicon
+* `<lexicon id="...">` - inline or external pronunciation lexicon
     * `id` - unique id of lexicon (used in `<lookup ref="...">`)
+    * `uri` - if empty or missing, lexicon is inline
     * One or more `<lexeme>` child elements with:
-        * `<grapheme role="...">WORD</grapheme>` - word text (optional [role][#word-roles])
+        *  Optional `role="..."` ([word roles][#word-roles] separated by whitespace)
+        * `<grapheme>WORD</grapheme>` - word text
         * `<phoneme>P H O N E M E S</phoneme>` - word pronunciation (phonemes separated by whitespace)
-* `<lookup ref="...">` - use inline pronunciation lexicon for child elements
+* `<lookup ref="...">` - use pronunciation lexicon for child elements
     * `ref` - id from a `<lexicon id="...">`
 
 #### Word Roles

diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -251,6 +251,15 @@ A subset of `the SSML standard <https://www.w3.org/TR/speech-synthesis11/>`_ is
 * ``<sub alias="">`` - substitute ``alias`` for inner text
 * ``<phoneme ph="...">`` - supply phonemes for inner text
     * ``ph`` - phonemes for each word of inner text, separated by whitespace
+* ``<lexicon id="...">`` - inline or external pronunciation lexicon
+    * ``id`` - unique id of lexicon (used in ``<lookup ref="...">``)
+    * ``uri`` - if empty or missing, lexicon is inline
+    * One or more ``<lexeme>`` child elements with:
+        * Optional ``role="..."`` (word roles separated by whitespace)
+        * ``<grapheme>WORD</grapheme>`` - word text
+        * ``<phoneme>P H O N E M E S</phoneme>`` - word pronunciation (phonemes separated by whitespace)
+* ``<lookup ref="...">`` - use pronunciation lexicon for child elements
+    * ``ref`` - id from a ``<lexicon id="...">``
 
 
 .. _database:

diff --git a/gruut/text_processor.py b/gruut/text_processor.py
@@ -46,6 +46,7 @@
 from gruut.utils import (
     attrib_no_namespace,
     leaves,
+    load_lexicon,
     maybe_split_ipa,
     pipeline_split,
     pipeline_transform,
@@ -951,28 +952,43 @@ def in_inline_lexicon(
                     lookup_stack.append(lookup_id)
                 elif elem_tag == "lexicon":
                     # Inline pronunciaton lexicon
-                    # NOTE: Empty lexicon id means the "default" inline lexicon (lookup not required)
+                    # NOTE: Empty lexicon id means the "default" inline lexicon (<lookup> not required)
                     lexicon_id = attrib_no_namespace(elem, "id", DEFAULT_LEXICON_ID)
                     assert lexicon_id is not None
 
-                    parsing_state = SSMLParsingState.IN_LEXICON
                     lexicon_alphabet = (
                         attrib_no_namespace(elem, "alphabet", "").strip().lower()
                     )
                     inline_lexicons[lexicon_id] = InlineLexicon(
                         lexicon_id=lexicon_id, alphabet=lexicon_alphabet
                     )
-                elif (elem_tag == "grapheme") and (
+
+                    lexicon_uri = attrib_no_namespace(elem, "uri", "")
+                    if lexicon_uri:
+                        # Lexicon defined externally
+                        _LOGGER.debug(
+                            "Loading pronunciation lexicon from %s", lexicon_uri
+                        )
+                        load_lexicon(lexicon_uri, inline_lexicons[lexicon_id])
+                    else:
+                        # Lexicon defined within this document
+                        parsing_state = SSMLParsingState.IN_LEXICON
+                elif (elem_tag == "lexeme") and (
                     parsing_state == SSMLParsingState.IN_LEXICON
                 ):
-                    # Inline pronunciaton lexicon (grapheme)
-                    parsing_state = SSMLParsingState.IN_LEXICON_GRAPHEME
                     if lexeme is None:
                         lexeme = Lexeme()
 
                     role_str = attrib_no_namespace(elem, "role")
                     if role_str:
                         lexeme.roles = set(role_str.strip().split())
+                elif (elem_tag == "grapheme") and (
+                    parsing_state == SSMLParsingState.IN_LEXICON
+                ):
+                    # Inline pronunciaton lexicon (grapheme)
+                    parsing_state = SSMLParsingState.IN_LEXICON_GRAPHEME
+                    if lexeme is None:
+                        lexeme = Lexeme()
                 elif (elem_tag == "phoneme") and (
                     parsing_state == SSMLParsingState.IN_LEXICON
                 ):

diff --git a/gruut/utils.py b/gruut/utils.py
@@ -3,14 +3,26 @@
 import logging
 import os
 import re
+import ssl
 import typing
 import xml.etree.ElementTree as etree
 from pathlib import Path
+from urllib.request import urlopen
 
 import networkx as nx
 from gruut_ipa import IPA
 
-from gruut.const import DATA_PROP, LANG_ALIASES, NODE_TYPE, EndElement, GraphType, Node
+from gruut.const import (
+    DATA_PROP,
+    LANG_ALIASES,
+    NODE_TYPE,
+    EndElement,
+    GraphType,
+    InlineLexicon,
+    Lexeme,
+    Node,
+    WordRole,
+)
 
 _DIR = Path(__file__).parent
 _LOGGER = logging.getLogger("gruut.utils")
@@ -215,6 +227,50 @@ def text_and_elements(element, is_last=False):
         yield tail
 
 
+def load_lexicon(
+    uri: str,
+    lexicon: InlineLexicon,
+    ssl_context: typing.Optional[ssl.SSLContext] = None,
+):
+    """Loads a pronunciation lexicon from a URI"""
+    if ssl_context is None:
+        ssl_context = ssl.create_default_context()
+
+    with urlopen(uri, context=ssl_context) as response:
+        tree = etree.parse(response)
+        for lexeme_elem in tree.getroot():
+            if tag_no_namespace(lexeme_elem.tag) != "lexeme":
+                continue
+
+            lexeme = Lexeme()
+
+            role_str = attrib_no_namespace(lexeme_elem, "role")
+            if role_str:
+                lexeme.roles = set(role_str.strip().split())
+
+            for lexeme_child in lexeme_elem:
+
+                child_tag = tag_no_namespace(lexeme_child.tag)
+                if child_tag == "grapheme":
+                    if lexeme_child.text:
+                        lexeme.grapheme = lexeme_child.text.strip()
+                elif child_tag == "phoneme":
+                    if lexeme_child.text:
+                        lexeme.phonemes = maybe_split_ipa(lexeme_child.text.strip())
+
+            if lexeme.grapheme and lexeme.phonemes:
+                role_phonemes = lexicon.words.get(lexeme.grapheme)
+                if role_phonemes is None:
+                    role_phonemes = {}
+                    lexicon.words[lexeme.grapheme] = role_phonemes
+
+                assert role_phonemes is not None
+
+                roles = lexeme.roles or [WordRole.DEFAULT]
+                for role in roles:
+                    role_phonemes[role] = lexeme.phonemes
+
+
 # -----------------------------------------------------------------------------
 # Text
 # -----------------------------------------------------------------------------

diff --git a/tests/test_ssml.py b/tests/test_ssml.py
@@ -2,10 +2,13 @@
 """Tests for SSML"""
 import sys
 import unittest
+from pathlib import Path
 
 from gruut import sentences
 from gruut.utils import print_graph
 
+_DIR = Path(__file__).parent
+
 
 class SSMLTestCase(unittest.TestCase):
     """Test cases for SSML"""
@@ -321,8 +324,8 @@ def test_lexicon(self):
         t ə m ˈɑ t oʊ
       </phoneme>
     </lexeme>
-    <lexeme>
-      <grapheme role="fake-role">
+    <lexeme role="fake-role">
+      <grapheme>
         tomato
       </grapheme>
       <phoneme>
@@ -354,6 +357,35 @@ def test_lexicon(self):
             ],
         )
 
+    def test_lexicon_external(self):
+        """Test <lexicon> from URI"""
+        lexicon_path = (_DIR.parent / "etc" / "sample_lexicon.xml").absolute()
+
+        text = f"""<?xml version="1.0"?>
+<speak version="1.1"
+       xmlns="http://www.w3.org/2001/10/synthesis"
+       xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+       xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
+                 http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
+       xml:lang="en-US">
+
+  <lexicon xml:id="test" alphabet="ipa" uri="file://{lexicon_path}" />
+
+  <lookup ref="test">
+    <w>tomato</w>
+  </lookup>
+</speak>"""
+
+        results = [
+            (w.sent_idx, w.idx, w.phonemes)
+            for sent in sentences(text, ssml=True)
+            for w in sent
+        ]
+
+        self.assertEqual(
+            results, [(0, 0, ["t", "ə", "m", "e", "i̥", "ɾ", "o", "u̥"])],
+        )
+
 
 def print_graph_stderr(graph, root):
     """Print graph to stderr"""