diff --git a/docs/source/changelog/changelog_3.0.rst b/docs/source/changelog/changelog_3.0.rst index ca6d3d1f..3d7016a3 100644 --- a/docs/source/changelog/changelog_3.0.rst +++ b/docs/source/changelog/changelog_3.0.rst @@ -5,6 +5,14 @@ 3.0 Changelog ************* +3.0.6 +----- + +- Fixed an issue where alignment analysis would not produce data for speech log likelihood and phone duration deviation +- Changed phone duration deviation metric to be maximum duration deviation rather than average across all phones in the utterance +- Fixed a crash when an empty phone set was specified in phone groups configuration files +- Fixed a crash when when using the :code:`--language` flag with values other than :code`japanese`, :code`thai`, :code`chinese` or :code`korean` + 3.0.5 ===== diff --git a/montreal_forced_aligner/acoustic_modeling/trainer.py b/montreal_forced_aligner/acoustic_modeling/trainer.py index 7a4d11b6..ab2481b5 100644 --- a/montreal_forced_aligner/acoustic_modeling/trainer.py +++ b/montreal_forced_aligner/acoustic_modeling/trainer.py @@ -625,6 +625,7 @@ def compute_phone_pdf_counts(self) -> None: def finalize_training(self): self.compute_phone_pdf_counts() self.collect_alignments() + self.analyze_alignments() self.train_phone_lm() def export_files( @@ -707,7 +708,6 @@ def align(self) -> None: {"done": True} ) session.commit() - self.analyze_alignments() except Exception as e: with self.session() as session: session.query(CorpusWorkflow).filter(CorpusWorkflow.id == wf.id).update( diff --git a/montreal_forced_aligner/alignment/base.py b/montreal_forced_aligner/alignment/base.py index 30d07385..a17baf5f 100644 --- a/montreal_forced_aligner/alignment/base.py +++ b/montreal_forced_aligner/alignment/base.py @@ -170,6 +170,9 @@ def analyze_alignments(self): if not config.USE_POSTGRES: logger.warning("Alignment analysis not available without using postgresql") return + workflow = self.current_workflow + if not workflow.alignments_collected: + self.collect_alignments() logger.info("Analyzing alignment quality...") begin = time.time() with self.session() as session: diff --git a/montreal_forced_aligner/corpus/base.py b/montreal_forced_aligner/corpus/base.py index 80556894..88bea0a8 100644 --- a/montreal_forced_aligner/corpus/base.py +++ b/montreal_forced_aligner/corpus/base.py @@ -614,7 +614,7 @@ def _finalize_load(self, session: Session, import_data: DatabaseImportData): self._num_files = None session.commit() - def normalize_text_arguments(self): + def get_tokenizers(self): from montreal_forced_aligner.dictionary.mixins import DictionaryMixin if self.language is Language.unknown: @@ -631,6 +631,16 @@ def normalize_text_arguments(self): tokenizers = self.tokenizer else: return None + return tokenizers + + def get_tokenizer(self, dictionary_id: int): + tokenizers = self.get_tokenizers() + if not isinstance(tokenizers, dict): + return tokenizers + return tokenizers[dictionary_id] + + def normalize_text_arguments(self): + tokenizers = self.get_tokenizers() from montreal_forced_aligner.corpus.multiprocessing import NormalizeTextArguments with self.session() as session: @@ -642,7 +652,7 @@ def normalize_text_arguments(self): self.split_directory.joinpath("log", f"normalize.{j.id}.log"), tokenizers, getattr(self, "g2p_model", None), - self.ignore_case, + getattr(self, "ignore_case", True), ) for j in jobs ] diff --git a/montreal_forced_aligner/corpus/multiprocessing.py b/montreal_forced_aligner/corpus/multiprocessing.py index 83ea40b3..8905e34c 100644 --- a/montreal_forced_aligner/corpus/multiprocessing.py +++ b/montreal_forced_aligner/corpus/multiprocessing.py @@ -313,7 +313,11 @@ def _run(self): if isinstance(tokenized, tuple): normalized_text, pronunciation_form = tokenized else: - normalized_text, pronunciation_form = tokenized, tokenized + if not isinstance(tokenized, str): + tokenized = " ".join([x.text for x in tokenized]) + if self.ignore_case: + tokenized = tokenized.lower() + normalized_text, pronunciation_form = tokenized, tokenized.lower() oovs = set() self.callback( ( @@ -334,7 +338,11 @@ def _run(self): .filter(Utterance.job_id == self.job_name) ) for u_id, u_text in utterances: - normalized_text, normalized_character_text, oovs = tokenizer(u_text) + if tokenizer is None: + normalized_text, normalized_character_text = u_text, u_text + oovs = [] + else: + normalized_text, normalized_character_text, oovs = tokenizer(u_text) self.callback( ( { diff --git a/montreal_forced_aligner/dictionary/mixins.py b/montreal_forced_aligner/dictionary/mixins.py index fb5986c9..2150daca 100644 --- a/montreal_forced_aligner/dictionary/mixins.py +++ b/montreal_forced_aligner/dictionary/mixins.py @@ -18,9 +18,9 @@ if TYPE_CHECKING: from montreal_forced_aligner.abc import MetaDict -DEFAULT_PUNCTUATION = list(r'、。।,?!!@<>→"”()“„–,.:;—¿?¡:)؟!\\&%#*،~【】,…‥「」『』〝〟″⟨⟩♪・‚‘‹›«»~′$+=‘۔') +DEFAULT_PUNCTUATION = list(r'、。।,?!!@<>→"”()“„–,.:;—¿?¡:)|؟!\\&%#*،~【】,…‥「」『』〝〟″⟨⟩♪・‚‘‹›«»~′$+=‘۔') -DEFAULT_WORD_BREAK_MARKERS = list(r'?!!(),,.:;¡¿?“„"”&~%#—…‥、。【】$+=〝〟″‹›«»・⟨⟩،「」『』؟') +DEFAULT_WORD_BREAK_MARKERS = list(r'?!!(),,.:;¡¿?“„"”&~%#—…‥、。|【】$+=〝〟″‹›«»・⟨⟩،「」『』؟') DEFAULT_QUOTE_MARKERS = list("“„\"”〝〟″「」『』‚ʻʿ‘′'") diff --git a/montreal_forced_aligner/dictionary/multispeaker.py b/montreal_forced_aligner/dictionary/multispeaker.py index d6c756b5..e2476b8b 100644 --- a/montreal_forced_aligner/dictionary/multispeaker.py +++ b/montreal_forced_aligner/dictionary/multispeaker.py @@ -160,6 +160,8 @@ def load_phone_groups(self) -> None: if isinstance(self._phone_groups, list): self._phone_groups = {k: v for k, v in enumerate(self._phone_groups)} for k, v in self._phone_groups.items(): + if not v: + continue self._phone_groups[k] = sorted( set(x for x in v if x in self.non_silence_phones) ) diff --git a/tests/test_commandline_align.py b/tests/test_commandline_align.py index 18e80cfb..f69e3219 100644 --- a/tests/test_commandline_align.py +++ b/tests/test_commandline_align.py @@ -671,6 +671,8 @@ def test_swedish_cv( swedish_cv_dictionary, swedish_cv_acoustic_model, output_dir, + "--language", + "swedish", "--config_path", basic_align_config_path, "-q",