Skip to content

Commit

Permalink
support playht3.0 languages (#357)
Browse files Browse the repository at this point in the history
* support playht3.0 languages

* update speech utils version
  • Loading branch information
xquanluu authored Oct 16, 2024
1 parent 84d83a0 commit de250c8
Show file tree
Hide file tree
Showing 5 changed files with 206 additions and 32 deletions.
4 changes: 2 additions & 2 deletions lib/routes/api/speech-credentials.js
Original file line number Diff line number Diff line change
Expand Up @@ -875,15 +875,15 @@ router.get('/:sid/test', async(req, res) => {
router.get('/speech/supportedLanguagesAndVoices', async(req, res) => {
const {logger, getTtsVoices} = req.app.locals;
try {
const {vendor, label} = req.query;
const {vendor, label, create_new} = req.query;
if (!vendor) {
throw new DbErrorBadRequest('vendor is required');
}
const account_sid = req.user.account_sid || req.body.account_sid;
const service_provider_sid = req.user.service_provider_sid ||
req.body.service_provider_sid || parseServiceProviderSid(req);

const credentials = await SpeechCredential.getSpeechCredentialsByVendorAndLabel(
const credentials = create_new ? null : await SpeechCredential.getSpeechCredentialsByVendorAndLabel(
service_provider_sid, account_sid, vendor, label);
const tmp = credentials && credentials.length > 0 ? credentials[0] : null;
const cred = tmp ? JSON.parse(decrypt(tmp.credential)) : null;
Expand Down
152 changes: 152 additions & 0 deletions lib/utils/speech-data/tts-languages-playht.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
// languages.js

module.exports = [
{
name: 'English',
value: 'english'
},
{
name: 'Mandarin',
value: 'mandarin'
},
{
name: 'Hindi',
value: 'hindi'
},
{
name: 'Japanese',
value: 'japanese'
},
{
name: 'Korean',
value: 'korean'
},
{
name: 'Arabic',
value: 'arabic'
},
{
name: 'Spanish',
value: 'spanish'
},
{
name: 'French',
value: 'french'
},
{
name: 'Italian',
value: 'italian'
},
{
name: 'Portuguese',
value: 'portuguese'
},
{
name: 'German',
value: 'german'
},
{
name: 'Dutch',
value: 'dutch'
},
{
name: 'Swedish',
value: 'swedish'
},
{
name: 'Czech',
value: 'czech'
},
{
name: 'Polish',
value: 'polish'
},
{
name: 'Russian',
value: 'russian'
},
{
name: 'Bulgarian',
value: 'bulgarian'
},
{
name: 'Hebrew',
value: 'hebrew'
},
{
name: 'Greek',
value: 'greek'
},
{
name: 'Turkish',
value: 'turkish'
},
{
name: 'Afrikaans',
value: 'afrikaans'
},
{
name: 'Xhosa',
value: 'xhosa'
},
{
name: 'Tagalog',
value: 'tagalog'
},
{
name: 'Malay',
value: 'malay'
},
{
name: 'Indonesian',
value: 'indonesian'
},
{
name: 'Bengali',
value: 'bengali'
},
{
name: 'Serbian',
value: 'serbian'
},
{
name: 'Thai',
value: 'thai'
},
{
name: 'Urdu',
value: 'urdu'
},
{
name: 'Croatian',
value: 'croatian'
},
{
name: 'Hungarian',
value: 'hungarian'
},
{
name: 'Danish',
value: 'danish'
},
{
name: 'Amharic',
value: 'amharic'
},
{
name: 'Albanian',
value: 'albanian'
},
{
name: 'Catalan',
value: 'catalan'
},
{
name: 'Ukrainian',
value: 'ukrainian'
},
{
name: 'Galician',
value: 'galician'
}
];
72 changes: 47 additions & 25 deletions lib/utils/speech-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ const TtsModelDeepgram = require('./speech-data/tts-model-deepgram');
const TtsModelElevenLabs = require('./speech-data/tts-model-elevenlabs');
const TtsModelWhisper = require('./speech-data/tts-model-whisper');
const TtsModelPlayHT = require('./speech-data/tts-model-playht');
const ttsLanguagesPlayHt = require('./speech-data/tts-languages-playht');
const TtsModelRimelabs = require('./speech-data/tts-model-rimelabs');

const SttGoogleLanguagesVoices = require('./speech-data/stt-google');
Expand All @@ -40,6 +41,7 @@ const SttSpeechmaticsLanguagesVoices = require('./speech-data/stt-speechmatics')
const SttAssemblyaiLanguagesVoices = require('./speech-data/stt-assemblyai');
const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');


const testSonioxStt = async(logger, credentials) => {
const api_key = credentials;
const soniox = new SpeechClient(api_key);
Expand Down Expand Up @@ -869,45 +871,65 @@ const fetchLayHTVoices = async(credential) => {

async function getLanguagesVoicesForPlayHT(credential) {
if (credential) {
const {voice_engine} = credential;
const [cloned_voice, voices] = await fetchLayHTVoices(credential);
const list_voices = [...cloned_voice, ...voices];

const buildVoice = (d) => {
let name = `${d.name} -${concat(d.accent)}${concat(d.age)}${concat(d.gender)}${concat(d.loudness)}` +
`${concat(d.style)}${concat(d.tempo)}${concat(d.texture)}` ;
name = name.endsWith(',') ? name.trim().slice(0, -1) : name;
name += !d.language_code ? ' - Custom Voice' : '';

return {
value: `${d.id}`,
name
};
};

const ttsVoices = list_voices.reduce((acc, voice) => {
// Play3.0 support all voice for PlayHT2.0*
const filteredVoiceEngine = credential.voice_engine === 'Play3.0' ?
`${credential.voice_engine}_PlayHT2.0_PlayHT2.0-turbo` : credential.voice_engine;
if (!filteredVoiceEngine.includes(voice.voice_engine)) {
const buildPlay30Payload = () => {
// PlayHT3.0 can play different languages with differrent voice.
// all voices will be added to english language by default and orther langauges will get voices from english.
const ttsVoices = ttsLanguagesPlayHt.map((l) => ({
...l,
voices: l.value === 'english' ? list_voices.map((v) => buildVoice(v)) : []
}));
return tranform(ttsVoices, undefined, TtsModelPlayHT);
};

const buildPayload = () => {
const ttsVoices = list_voices.reduce((acc, voice) => {
if (!voice_engine.includes(voice.voice_engine)) {
return acc;
}
const languageCode = voice.language_code;
// custom voice does not have language code
if (!languageCode) {
voice.language_code = 'en';
voice.language = 'Custom-English';
}
const existingLanguage = acc.find((lang) => lang.value === languageCode);
if (existingLanguage) {
existingLanguage.voices.push(buildVoice(voice));
} else {
acc.push({
value: voice.language_code,
name: voice.language,
voices: [buildVoice(voice)]
});
}
return acc;
}
const languageCode = voice.language_code;
// custom voice does not have language code
if (!languageCode) {
voice.language_code = 'en';
voice.language = 'Custom-English';
}
const existingLanguage = acc.find((lang) => lang.value === languageCode);
if (existingLanguage) {
existingLanguage.voices.push(buildVoice(voice));
} else {
acc.push({
value: voice.language_code,
name: voice.language,
voices: [buildVoice(voice)]
});
}
return acc;
}, []);
return tranform(ttsVoices, undefined, TtsModelPlayHT);
}, []);
return tranform(ttsVoices, undefined, TtsModelPlayHT);
};

switch (voice_engine) {
case 'Play3.0':
return buildPlay30Payload();

default:
return buildPayload();
}
}
return tranform(TtsPlayHtLanguagesVoices, undefined, TtsModelPlayHT);
}
Expand Down
8 changes: 4 additions & 4 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"@jambonz/lamejs": "^1.2.2",
"@jambonz/mw-registrar": "^0.2.7",
"@jambonz/realtimedb-helpers": "^0.8.10",
"@jambonz/speech-utils": "^0.1.18",
"@jambonz/speech-utils": "^0.1.19",
"@jambonz/time-series": "^0.2.8",
"@jambonz/verb-specifications": "^0.0.72",
"@soniox/soniox-node": "^1.2.2",
Expand Down

0 comments on commit de250c8

Please sign in to comment.