-
Notifications
You must be signed in to change notification settings - Fork 251
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* CLI for whisperx and speechbrain transcription * Optimizations for training acoustic models * Switch to using miniforge for gha * Fixing adaptation for older models
- Loading branch information
1 parent
78e481d
commit 1f91bff
Showing
62 changed files
with
3,999 additions
and
1,453 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,18 +1,50 @@ | ||
#!/usr/bin/env python | ||
|
||
import argparse | ||
import os | ||
import shutil | ||
import subprocess | ||
import sys | ||
from importlib.util import find_spec | ||
|
||
anchor_found = find_spec("anchor") is not None | ||
if __name__ == "__main__": | ||
parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter) | ||
parser.add_argument( | ||
"--install_3p", | ||
action="store_true", | ||
help="Install/update third party dependencies (Speechbrain and WhisperX)", | ||
) | ||
args = parser.parse_args() | ||
anchor_found = find_spec("anchor") is not None | ||
speechbrain_found = find_spec("speechbrain") is not None | ||
whisperx_found = find_spec("whisperx") is not None | ||
|
||
conda_path = shutil.which("conda") | ||
mamba_path = shutil.which("mamba") | ||
if mamba_path is None: | ||
print("No mamba found, installing first...") | ||
subprocess.call([conda_path, "install", "-c", "conda-forge", "-y", "mamba"], env=os.environ) | ||
package_list = ["montreal-forced-aligner", "kalpy", "kaldi=*=cpu*"] | ||
if anchor_found: | ||
package_list.append("anchor-annotator") | ||
subprocess.call([mamba_path, "update", "-c", "conda-forge", "-y"] + package_list, env=os.environ) | ||
conda_path = shutil.which("conda") | ||
if conda_path is None: | ||
print("Please install conda before running this command.") | ||
sys.exit(1) | ||
mamba_path = shutil.which("mamba") | ||
if mamba_path is None: | ||
print("No mamba found, installing first...") | ||
subprocess.call( | ||
[conda_path, "install", "-c", "conda-forge", "-y", "mamba"], env=os.environ | ||
) | ||
package_list = ["montreal-forced-aligner", "kalpy", "kaldi=*=cpu*"] | ||
if anchor_found: | ||
package_list.append("anchor-annotator") | ||
subprocess.call( | ||
[mamba_path, "update", "-c", "conda-forge", "-y"] + package_list, env=os.environ | ||
) | ||
if args.install_3p: | ||
channels = ["conda-forge", "pytorch", "nvidia", "anaconda"] | ||
package_list = ["pytorch", "torchaudio"] | ||
if not whisperx_found: | ||
package_list.extend(["cudnn=8", "transformers"]) | ||
command = [mamba_path, "install", "-y"] | ||
for c in channels: | ||
command.extend(["-c", c]) | ||
command += package_list | ||
subprocess.call(command, env=os.environ) | ||
command = ["pip", "install", "-U"] | ||
package_list = ["whisperx", "speechbrain", "pygtrie"] | ||
subprocess.call(command, env=os.environ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
|
||
.. _changelog_3.1: | ||
|
||
************* | ||
3.1 Changelog | ||
************* | ||
|
||
3.1.4 | ||
----- | ||
|
||
- Optimized :code:`mfa g2p` to better use multiple processes | ||
- Added :code:`--export_scores` to :code:`mfa g2p` for adding a column representing the final weights of the generated pronunciations | ||
- Added :code:`--output_directory` to :code:`mfa validate` to save generated validation files rather than the temporary directory | ||
- Fixed a bug in cutoff modeling that was preventing them from being properly parsed | ||
|
||
3.1.3 | ||
----- | ||
|
||
- Fixed an issue where silence probability being zero was not correctly removing silence | ||
- Compatibility with kalpy v0.6.5 | ||
- Added API functionality for verifying transcripts with interjection words in alignment | ||
- Fixed an error in fine tuning that generated nonsensical boundaries | ||
|
||
3.1.2 | ||
----- | ||
|
||
- Fixed a bug where hidden files and folders would be parsed as corpus data | ||
- Fixed a bug where validation would not respect :code:`--no_final_clean` | ||
- Fixed a rare crash in training when a job would not have utterances assigned to it | ||
- Fixed a bug where MFA would mistakenly report a dictionary and acoustic model phones did not match for older versions | ||
|
||
3.1.1 | ||
----- | ||
|
||
- Fixed an issue with TextGrids missing intervals | ||
|
||
3.1.0 | ||
----- | ||
|
||
- Fixed a bug where cutoffs were not properly modelled | ||
- Added additional filter on create subset to not include utterances with cutoffs in smaller subsets | ||
- Added the ability to specify HMM topologies for phones | ||
- Fixed issues caused by validators not cleaning up temporary files and databases | ||
- Added support for default and nonnative dictionaries generated from other dictionaries | ||
- Restricted initial training rounds to exclude default and nonnative dictionaries | ||
- Changed clustering of phones to not mix silence and non-silence phones | ||
- Optimized textgrid export | ||
- Added better memory management for collecting alignments |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
|
||
.. _changelog_3.2: | ||
|
||
************* | ||
3.2 Changelog | ||
************* | ||
|
||
3.2.0 | ||
----- | ||
|
||
- Added :code:`--subset_word_count` parameter to :ref:`train_acoustic_model` to add a minimum word count for an utterance to be included in training subsets | ||
- Added :code:`--minimum_utterance_length` parameter to :ref:`train_acoustic_model` to add a minimum word count for an utterance to be included in training at all | ||
- Improved memory usage in compiling training graphs for initial subsets | ||
- Add support for transcription via whisperx and speechbrain models | ||
- Update text normalization to normalize to decomposed forms | ||
- Compatibility with Kalpy 0.6.7 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
channels: | ||
- conda-forge | ||
dependencies: | ||
- python>=3.8 | ||
- numpy | ||
- librosa | ||
- pysoundfile | ||
- tqdm | ||
- requests | ||
- pyyaml | ||
- dataclassy | ||
- kaldi=*=*cpu* | ||
- scipy | ||
- pynini | ||
- openfst=1.8.3 | ||
- scikit-learn<1.3 | ||
- hdbscan | ||
- baumwelch | ||
- ngram | ||
- praatio=6.0.0 | ||
- biopython=1.79 | ||
- sqlalchemy>=2.0 | ||
- pgvector | ||
- pgvector-python | ||
- sqlite | ||
- postgresql | ||
- psycopg2 | ||
- click | ||
- setuptools_scm | ||
- pytest | ||
- pytest-mypy | ||
- pytest-cov | ||
- pytest-timeout | ||
- mock | ||
- coverage | ||
- coveralls | ||
- interrogate | ||
- kneed | ||
- matplotlib | ||
- seaborn | ||
- rich | ||
- rich-click | ||
- kalpy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.