Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C APIの改善を行う #217

Merged
merged 31 commits into from
Aug 26, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
0429477
C APIの改善を行う
qwerty2501 Aug 6, 2022
fce9290
Merge branch 'main' of https://github.com/VOICEVOX/voicevox_core into…
qwerty2501 Aug 19, 2022
60bc086
C API定義を修正した
qwerty2501 Aug 20, 2022
af7a3fe
voicevox_core側の実装も変更した
qwerty2501 Aug 20, 2022
466fdd9
cpu_num_threadsをu16に変更した
qwerty2501 Aug 21, 2022
b5635cf
helper関数を分離した
qwerty2501 Aug 21, 2022
43e2c51
SynthesisOptionsは不要そうだったので削除
qwerty2501 Aug 21, 2022
b5cd93e
SynthesisOptionsを追加
qwerty2501 Aug 21, 2022
2ece206
decode_forward -> decodeに変更した
qwerty2501 Aug 21, 2022
3a80b7e
yukarin_sa_forward -> predict_intonation に変更した
qwerty2501 Aug 21, 2022
e3a3469
yukarin_s_forward -> predict_durationに変更
qwerty2501 Aug 21, 2022
317d432
とりあえずコンパイル・テストが通るようにした
qwerty2501 Aug 22, 2022
911fdd6
clippyのエラーを修正した
qwerty2501 Aug 22, 2022
0a5db30
内部のyukarin_s,yukarin_saについて名前を修正
qwerty2501 Aug 22, 2022
5fe6665
decode_forwardとなっていたところをdecodeに修正した
qwerty2501 Aug 22, 2022
b625954
unixのテストが通るように修正
qwerty2501 Aug 22, 2022
01d6eca
API定義とそこまで関係ない実装はhelperに移動させた
qwerty2501 Aug 22, 2022
7f04348
VoicevoxInitializeOptionsをコメントを元に修正した
qwerty2501 Aug 22, 2022
d92c0e4
TtsOptionsの変換をFrom traitで行うようにした
qwerty2501 Aug 22, 2022
5892187
疑問文モードを実装した
qwerty2501 Aug 22, 2022
3f53f07
voicevox_default_synthesis_optionsを追加、enable_interrogative_upspeakがデフ…
qwerty2501 Aug 22, 2022
a489c4a
Merge branch 'main' of https://github.com/VOICEVOX/voicevox_core into…
qwerty2501 Aug 22, 2022
3e1136f
kanaは defaultとするようにした
qwerty2501 Aug 22, 2022
f697e09
use_gpu -> AccelerationModeに変更した
qwerty2501 Aug 23, 2022
07bc35b
GPU使用中かどうか判定するAPI関数を追加した
qwerty2501 Aug 23, 2022
d1e2c4b
defaultの記述を新しい形にした
qwerty2501 Aug 23, 2022
7e50250
Auto=0とした
qwerty2501 Aug 23, 2022
f6e4d9b
is_gpu_modeに名称変更
qwerty2501 Aug 25, 2022
a9c2cee
defaultオプション生成関数にmakeをつけた 
qwerty2501 Aug 25, 2022
a599e17
output_binary_size->output_wav_sizeに変更 型をc_int->usizeに変更した
qwerty2501 Aug 26, 2022
d5af118
speaker_idをu32に変更した
qwerty2501 Aug 26, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion crates/voicevox_core/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ pub enum Error {
}

fn base_error_message(result_code: VoicevoxResultCode) -> &'static str {
let c_message: &'static str = crate::voicevox_error_result_to_message(result_code);
let c_message: &'static str = crate::error_result_to_message(result_code);
&c_message[..(c_message.len() - 1)]
}

Expand Down
92 changes: 28 additions & 64 deletions crates/voicevox_core/src/publish.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ use onnxruntime::{
session::{AnyArray, NdArray},
};
use result_code::VoicevoxResultCode;
use std::collections::BTreeMap;
use std::ffi::CStr;
use std::sync::Mutex;
use std::{collections::BTreeMap, path::PathBuf};

use status::*;
use std::ffi::CString;
Expand All @@ -32,12 +32,8 @@ impl VoicevoxCore {
})
}

pub fn initialize(
&mut self,
use_gpu: bool,
cpu_num_threads: usize,
load_all_models: bool,
) -> Result<()> {
pub fn initialize(&mut self, options: InitializeOptions) -> Result<()> {
unimplemented!();
self.synthesis_engine.inference_core_mut().initialize(
use_gpu,
cpu_num_threads,
Expand All @@ -61,11 +57,11 @@ impl VoicevoxCore {
self.synthesis_engine.inference_core_mut().finalize()
}

pub fn metas(&self) -> &'static CStr {
pub fn get_metas_json(&self) -> &'static CStr {
&METAS_CSTRING
}

pub fn supported_devices(&self) -> &'static CStr {
pub fn get_supported_devices_json(&self) -> &'static CStr {
&SUPPORTED_DEVICES_CSTRING
}

Expand Down Expand Up @@ -122,15 +118,7 @@ impl VoicevoxCore {
)
}

pub fn voicevox_load_openjtalk_dict(&mut self, dict_path: &str) -> Result<()> {
self.synthesis_engine.load_openjtalk_dict(dict_path)
}

pub fn voicevox_audio_query(
&mut self,
text: &str,
speaker_id: usize,
) -> Result<AudioQueryModel> {
pub fn audio_query(&mut self, text: &str, speaker_id: usize) -> Result<AudioQueryModel> {
if !self.synthesis_engine.is_openjtalk_dict_loaded() {
return Err(Error::NotLoadedOpenjtalkDict);
}
Expand All @@ -152,48 +140,36 @@ impl VoicevoxCore {
))
}

pub fn voicevox_audio_query_from_kana(
&mut self,
text: &str,
speaker_id: usize,
) -> Result<AudioQueryModel> {
let accent_phrases = parse_kana(text)?;
let accent_phrases = self
.synthesis_engine
.replace_mora_data(&accent_phrases, speaker_id)?;

Ok(AudioQueryModel::new(
accent_phrases,
1.,
0.,
1.,
1.,
0.1,
0.1,
SynthesisEngine::DEFAULT_SAMPLING_RATE,
false,
"".into(),
))
}

pub fn voicevox_synthesis(
pub fn synthesis(
&mut self,
audio_query: &AudioQueryModel,
speaker_id: usize,
options: SynthesisOptions,
) -> Result<Vec<u8>> {
unimplemented!();
self.synthesis_engine
.synthesis_wave_format(audio_query, speaker_id, true) // TODO: 疑問文化を設定可能にする
}

pub fn voicevox_tts(&mut self, text: &str, speaker_id: usize) -> Result<Vec<u8>> {
let audio_query = &self.voicevox_audio_query(text, speaker_id)?;
self.voicevox_synthesis(audio_query, speaker_id)
pub fn tts(&mut self, text: &str, speaker_id: usize, options: TtsOptions) -> Result<Vec<u8>> {
let audio_query = &self.audio_query(text, speaker_id)?;
self.synthesis(audio_query, speaker_id)
}
}

pub fn voicevox_tts_from_kana(&mut self, text: &str, speaker_id: usize) -> Result<Vec<u8>> {
let audio_query = &self.voicevox_audio_query_from_kana(text, speaker_id)?;
self.voicevox_synthesis(audio_query, speaker_id)
}
pub struct InitializeOptions {
use_cuda: bool,
cpu_num_threads: u32,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

負数を入れる余地を作りたくなかったのでu32にしたがそれでよかったか? usizeだとちょっと大きすぎる気がしたので
内部的には c_intを使ってるので絶対に桁落ちさせないといういみではu16のほうがよかったかもしれない

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

usize

どうなんでしょう? Python越しにせいぜい数回やりとりするデータなのでサイズはあまり問題ではないとは思うのですが、桁落ちも確かにありますね。

Copy link
Contributor Author

@qwerty2501 qwerty2501 Aug 20, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

内部的にはc_intなので usize -> c_intへの変更の際に桁落ちはありえますね。変換前にチェックして値が大きすぎたらエラーにする方法もまあなくはないですが、そもそもCPUの数でそこまで大きな値を指定することはないと考えるとusizeは冗長かもですね。
変換前に桁落ちしないかチェックが必要なのはu32も同じなので、そういう意味だとu16がベストかも?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

話はずれますがrepr(Rust)な方の構造体ではOption<NonZeroU32>のように持つのもいいかもしれません。プリミティブな整数型に一対一で対応しますし。

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ですね。u16がいいかも。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optionについてはどうでしょうね。 onnxruntimeではcpu_num_threads:0も意味のある値ではあるのでそのままu16としても良いかもしれませんが、onnxruntimeに依存しないという意味では Option<NonZeroU16> としてもいいかもしれないですが。

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

あーそうでしたね。onnxruntimeにしか渡さないのならあまり意味はないかも。DLLを使うユーザーに提示できるならともかく。

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cpu_num_threadsをu16に変更しました

load_all_models: bool,
open_jtalk_dict_dir: Option<PathBuf>,
PickledChair marked this conversation as resolved.
Show resolved Hide resolved
}

pub struct SynthesisOptions {
kana: bool,
}

pub struct TtsOptions {
kana: bool,
}

#[derive(new)]
Expand Down Expand Up @@ -506,7 +482,7 @@ fn get_model_index_and_speaker_id(speaker_id: usize) -> Option<(usize, usize)> {
SPEAKER_ID_MAP.get(&speaker_id).copied()
}

pub const fn voicevox_error_result_to_message(result_code: VoicevoxResultCode) -> &'static str {
pub const fn error_result_to_message(result_code: VoicevoxResultCode) -> &'static str {
// C APIのため、messageには必ず末尾にNULL文字を追加する
use VoicevoxResultCode::*;
match result_code {
Expand Down Expand Up @@ -635,7 +611,7 @@ mod tests {
#[rstest]
fn supported_devices_works() {
let internal = VoicevoxCore::new_with_mutex();
let cstr_result = internal.lock().unwrap().supported_devices();
let cstr_result = internal.lock().unwrap().get_supported_devices_json();
assert!(cstr_result.to_str().is_ok(), "{:?}", cstr_result);

let json_result: std::result::Result<SupportedDevices, _> =
Expand Down Expand Up @@ -736,16 +712,4 @@ mod tests {
assert!(result.is_ok(), "{:?}", result);
assert_eq!(result.unwrap().len(), F0_LENGTH * 256);
}

#[rstest]
#[async_std::test]
async fn voicevox_load_openjtalk_dict_works() {
let internal = VoicevoxCore::new_with_mutex();
let open_jtalk_dic_dir = download_open_jtalk_dict_if_no_exists().await;
let result = internal
.lock()
.unwrap()
.voicevox_load_openjtalk_dict(open_jtalk_dic_dir.to_str().unwrap());
assert_eq!(result, Ok(()));
}
}
Loading