Skip to content

Commit

Permalink
Support external pronunciation lexicons
Browse files Browse the repository at this point in the history
  • Loading branch information
synesthesiam committed Dec 6, 2021
1 parent 8e81436 commit 6226382
Show file tree
Hide file tree
Showing 6 changed files with 128 additions and 11 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,12 @@
### Added

- Support for Luxembourgish (credit: mbarnig)
- External pronunciation lexicons with <lexicon uri="...">

### Fixed

- Choose first word pronunciation when all pronunciations have roles instead of last
- Word role is on <lexeme> instead of <grapheme>

## [2.1.0] - 2021 Nov 10

Expand Down
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -324,12 +324,14 @@ A subset of [SSML](https://www.w3.org/TR/speech-synthesis11/) is supported:
* `<sub alias="">` - substitute `alias` for inner text
* `<phoneme ph="...">` - supply phonemes for inner text
* `ph` - phonemes for each word of inner text, separated by whitespace
* `<lexicon id="...">` - inline pronunciation lexicon
* `<lexicon id="...">` - inline or external pronunciation lexicon
* `id` - unique id of lexicon (used in `<lookup ref="...">`)
* `uri` - if empty or missing, lexicon is inline
* One or more `<lexeme>` child elements with:
* `<grapheme role="...">WORD</grapheme>` - word text (optional [role][#word-roles])
* Optional `role="..."` ([word roles][#word-roles] separated by whitespace)
* `<grapheme>WORD</grapheme>` - word text
* `<phoneme>P H O N E M E S</phoneme>` - word pronunciation (phonemes separated by whitespace)
* `<lookup ref="...">` - use inline pronunciation lexicon for child elements
* `<lookup ref="...">` - use pronunciation lexicon for child elements
* `ref` - id from a `<lexicon id="...">`

#### Word Roles
Expand Down
9 changes: 9 additions & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,15 @@ A subset of `the SSML standard <https://www.w3.org/TR/speech-synthesis11/>`_ is
* ``<sub alias="">`` - substitute ``alias`` for inner text
* ``<phoneme ph="...">`` - supply phonemes for inner text
* ``ph`` - phonemes for each word of inner text, separated by whitespace
* ``<lexicon id="...">`` - inline or external pronunciation lexicon
* ``id`` - unique id of lexicon (used in ``<lookup ref="...">``)
* ``uri`` - if empty or missing, lexicon is inline
* One or more ``<lexeme>`` child elements with:
* Optional ``role="..."`` (word roles separated by whitespace)
* ``<grapheme>WORD</grapheme>`` - word text
* ``<phoneme>P H O N E M E S</phoneme>`` - word pronunciation (phonemes separated by whitespace)
* ``<lookup ref="...">`` - use pronunciation lexicon for child elements
* ``ref`` - id from a ``<lexicon id="...">``


.. _database:
Expand Down
26 changes: 21 additions & 5 deletions gruut/text_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
from gruut.utils import (
attrib_no_namespace,
leaves,
load_lexicon,
maybe_split_ipa,
pipeline_split,
pipeline_transform,
Expand Down Expand Up @@ -951,28 +952,43 @@ def in_inline_lexicon(
lookup_stack.append(lookup_id)
elif elem_tag == "lexicon":
# Inline pronunciaton lexicon
# NOTE: Empty lexicon id means the "default" inline lexicon (lookup not required)
# NOTE: Empty lexicon id means the "default" inline lexicon (<lookup> not required)
lexicon_id = attrib_no_namespace(elem, "id", DEFAULT_LEXICON_ID)
assert lexicon_id is not None

parsing_state = SSMLParsingState.IN_LEXICON
lexicon_alphabet = (
attrib_no_namespace(elem, "alphabet", "").strip().lower()
)
inline_lexicons[lexicon_id] = InlineLexicon(
lexicon_id=lexicon_id, alphabet=lexicon_alphabet
)
elif (elem_tag == "grapheme") and (

lexicon_uri = attrib_no_namespace(elem, "uri", "")
if lexicon_uri:
# Lexicon defined externally
_LOGGER.debug(
"Loading pronunciation lexicon from %s", lexicon_uri
)
load_lexicon(lexicon_uri, inline_lexicons[lexicon_id])
else:
# Lexicon defined within this document
parsing_state = SSMLParsingState.IN_LEXICON
elif (elem_tag == "lexeme") and (
parsing_state == SSMLParsingState.IN_LEXICON
):
# Inline pronunciaton lexicon (grapheme)
parsing_state = SSMLParsingState.IN_LEXICON_GRAPHEME
if lexeme is None:
lexeme = Lexeme()

role_str = attrib_no_namespace(elem, "role")
if role_str:
lexeme.roles = set(role_str.strip().split())
elif (elem_tag == "grapheme") and (
parsing_state == SSMLParsingState.IN_LEXICON
):
# Inline pronunciaton lexicon (grapheme)
parsing_state = SSMLParsingState.IN_LEXICON_GRAPHEME
if lexeme is None:
lexeme = Lexeme()
elif (elem_tag == "phoneme") and (
parsing_state == SSMLParsingState.IN_LEXICON
):
Expand Down
58 changes: 57 additions & 1 deletion gruut/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,26 @@
import logging
import os
import re
import ssl
import typing
import xml.etree.ElementTree as etree
from pathlib import Path
from urllib.request import urlopen

import networkx as nx
from gruut_ipa import IPA

from gruut.const import DATA_PROP, LANG_ALIASES, NODE_TYPE, EndElement, GraphType, Node
from gruut.const import (
DATA_PROP,
LANG_ALIASES,
NODE_TYPE,
EndElement,
GraphType,
InlineLexicon,
Lexeme,
Node,
WordRole,
)

_DIR = Path(__file__).parent
_LOGGER = logging.getLogger("gruut.utils")
Expand Down Expand Up @@ -215,6 +227,50 @@ def text_and_elements(element, is_last=False):
yield tail


def load_lexicon(
uri: str,
lexicon: InlineLexicon,
ssl_context: typing.Optional[ssl.SSLContext] = None,
):
"""Loads a pronunciation lexicon from a URI"""
if ssl_context is None:
ssl_context = ssl.create_default_context()

with urlopen(uri, context=ssl_context) as response:
tree = etree.parse(response)
for lexeme_elem in tree.getroot():
if tag_no_namespace(lexeme_elem.tag) != "lexeme":
continue

lexeme = Lexeme()

role_str = attrib_no_namespace(lexeme_elem, "role")
if role_str:
lexeme.roles = set(role_str.strip().split())

for lexeme_child in lexeme_elem:

child_tag = tag_no_namespace(lexeme_child.tag)
if child_tag == "grapheme":
if lexeme_child.text:
lexeme.grapheme = lexeme_child.text.strip()
elif child_tag == "phoneme":
if lexeme_child.text:
lexeme.phonemes = maybe_split_ipa(lexeme_child.text.strip())

if lexeme.grapheme and lexeme.phonemes:
role_phonemes = lexicon.words.get(lexeme.grapheme)
if role_phonemes is None:
role_phonemes = {}
lexicon.words[lexeme.grapheme] = role_phonemes

assert role_phonemes is not None

roles = lexeme.roles or [WordRole.DEFAULT]
for role in roles:
role_phonemes[role] = lexeme.phonemes


# -----------------------------------------------------------------------------
# Text
# -----------------------------------------------------------------------------
Expand Down
36 changes: 34 additions & 2 deletions tests/test_ssml.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,13 @@
"""Tests for SSML"""
import sys
import unittest
from pathlib import Path

from gruut import sentences
from gruut.utils import print_graph

_DIR = Path(__file__).parent


class SSMLTestCase(unittest.TestCase):
"""Test cases for SSML"""
Expand Down Expand Up @@ -321,8 +324,8 @@ def test_lexicon(self):
t ə m ˈɑ t oʊ
</phoneme>
</lexeme>
<lexeme>
<grapheme role="fake-role">
<lexeme role="fake-role">
<grapheme>
tomato
</grapheme>
<phoneme>
Expand Down Expand Up @@ -354,6 +357,35 @@ def test_lexicon(self):
],
)

def test_lexicon_external(self):
"""Test <lexicon> from URI"""
lexicon_path = (_DIR.parent / "etc" / "sample_lexicon.xml").absolute()

text = f"""<?xml version="1.0"?>
<speak version="1.1"
xmlns="http://www.w3.org/2001/10/synthesis"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.w3.org/2001/10/synthesis
http://www.w3.org/TR/speech-synthesis11/synthesis.xsd"
xml:lang="en-US">
<lexicon xml:id="test" alphabet="ipa" uri="file://{lexicon_path}" />
<lookup ref="test">
<w>tomato</w>
</lookup>
</speak>"""

results = [
(w.sent_idx, w.idx, w.phonemes)
for sent in sentences(text, ssml=True)
for w in sent
]

self.assertEqual(
results, [(0, 0, ["t", "ə", "m", "e", "i̥", "ɾ", "o", "u̥"])],
)


def print_graph_stderr(graph, root):
"""Print graph to stderr"""
Expand Down

0 comments on commit 6226382

Please sign in to comment.