support playht3.0 languages (#357)

* support playht3.0 languages * update speech utils version
jambonz · Oct 16, 2024 · de250c8 · de250c8
1 parent 84d83a0
commit de250c8
Show file tree

Hide file tree

Showing 5 changed files with 206 additions and 32 deletions.
diff --git a/lib/routes/api/speech-credentials.js b/lib/routes/api/speech-credentials.js
@@ -875,15 +875,15 @@ router.get('/:sid/test', async(req, res) => {
 router.get('/speech/supportedLanguagesAndVoices', async(req, res) => {
   const {logger, getTtsVoices} = req.app.locals;
   try {
-    const {vendor, label} = req.query;
+    const {vendor, label, create_new} = req.query;
     if (!vendor) {
       throw new DbErrorBadRequest('vendor is required');
     }
     const account_sid = req.user.account_sid || req.body.account_sid;
     const service_provider_sid = req.user.service_provider_sid ||
       req.body.service_provider_sid || parseServiceProviderSid(req);
 
-    const credentials = await SpeechCredential.getSpeechCredentialsByVendorAndLabel(
+    const credentials = create_new ? null : await SpeechCredential.getSpeechCredentialsByVendorAndLabel(
       service_provider_sid, account_sid, vendor, label);
     const tmp = credentials && credentials.length > 0 ? credentials[0] : null;
     const cred = tmp ? JSON.parse(decrypt(tmp.credential)) : null;

diff --git a/lib/utils/speech-data/tts-languages-playht.js b/lib/utils/speech-data/tts-languages-playht.js
@@ -0,0 +1,152 @@
+// languages.js
+
+module.exports = [
+  {
+    name: 'English',
+    value: 'english'
+  },
+  {
+    name: 'Mandarin',
+    value: 'mandarin'
+  },
+  {
+    name: 'Hindi',
+    value: 'hindi'
+  },
+  {
+    name: 'Japanese',
+    value: 'japanese'
+  },
+  {
+    name: 'Korean',
+    value: 'korean'
+  },
+  {
+    name: 'Arabic',
+    value: 'arabic'
+  },
+  {
+    name: 'Spanish',
+    value: 'spanish'
+  },
+  {
+    name: 'French',
+    value: 'french'
+  },
+  {
+    name: 'Italian',
+    value: 'italian'
+  },
+  {
+    name: 'Portuguese',
+    value: 'portuguese'
+  },
+  {
+    name: 'German',
+    value: 'german'
+  },
+  {
+    name: 'Dutch',
+    value: 'dutch'
+  },
+  {
+    name: 'Swedish',
+    value: 'swedish'
+  },
+  {
+    name: 'Czech',
+    value: 'czech'
+  },
+  {
+    name: 'Polish',
+    value: 'polish'
+  },
+  {
+    name: 'Russian',
+    value: 'russian'
+  },
+  {
+    name: 'Bulgarian',
+    value: 'bulgarian'
+  },
+  {
+    name: 'Hebrew',
+    value: 'hebrew'
+  },
+  {
+    name: 'Greek',
+    value: 'greek'
+  },
+  {
+    name: 'Turkish',
+    value: 'turkish'
+  },
+  {
+    name: 'Afrikaans',
+    value: 'afrikaans'
+  },
+  {
+    name: 'Xhosa',
+    value: 'xhosa'
+  },
+  {
+    name: 'Tagalog',
+    value: 'tagalog'
+  },
+  {
+    name: 'Malay',
+    value: 'malay'
+  },
+  {
+    name: 'Indonesian',
+    value: 'indonesian'
+  },
+  {
+    name: 'Bengali',
+    value: 'bengali'
+  },
+  {
+    name: 'Serbian',
+    value: 'serbian'
+  },
+  {
+    name: 'Thai',
+    value: 'thai'
+  },
+  {
+    name: 'Urdu',
+    value: 'urdu'
+  },
+  {
+    name: 'Croatian',
+    value: 'croatian'
+  },
+  {
+    name: 'Hungarian',
+    value: 'hungarian'
+  },
+  {
+    name: 'Danish',
+    value: 'danish'
+  },
+  {
+    name: 'Amharic',
+    value: 'amharic'
+  },
+  {
+    name: 'Albanian',
+    value: 'albanian'
+  },
+  {
+    name: 'Catalan',
+    value: 'catalan'
+  },
+  {
+    name: 'Ukrainian',
+    value: 'ukrainian'
+  },
+  {
+    name: 'Galician',
+    value: 'galician'
+  }
+];
diff --git a/lib/utils/speech-utils.js b/lib/utils/speech-utils.js
@@ -25,6 +25,7 @@ const TtsModelDeepgram = require('./speech-data/tts-model-deepgram');
 const TtsModelElevenLabs = require('./speech-data/tts-model-elevenlabs');
 const TtsModelWhisper = require('./speech-data/tts-model-whisper');
 const TtsModelPlayHT = require('./speech-data/tts-model-playht');
+const ttsLanguagesPlayHt = require('./speech-data/tts-languages-playht');
 const TtsModelRimelabs = require('./speech-data/tts-model-rimelabs');
 
 const SttGoogleLanguagesVoices = require('./speech-data/stt-google');
@@ -40,6 +41,7 @@ const SttSpeechmaticsLanguagesVoices = require('./speech-data/stt-speechmatics')
 const SttAssemblyaiLanguagesVoices = require('./speech-data/stt-assemblyai');
 const SttVerbioLanguagesVoices = require('./speech-data/stt-verbio');
 
+
 const testSonioxStt = async(logger, credentials) => {
   const api_key = credentials;
   const soniox = new SpeechClient(api_key);
@@ -869,45 +871,65 @@ const fetchLayHTVoices = async(credential) => {
 
 async function getLanguagesVoicesForPlayHT(credential) {
   if (credential) {
+    const {voice_engine} = credential;
     const [cloned_voice, voices] = await fetchLayHTVoices(credential);
     const list_voices = [...cloned_voice, ...voices];
 
     const buildVoice = (d) => {
       let name = `${d.name} -${concat(d.accent)}${concat(d.age)}${concat(d.gender)}${concat(d.loudness)}` +
       `${concat(d.style)}${concat(d.tempo)}${concat(d.texture)}` ;
       name = name.endsWith(',') ? name.trim().slice(0, -1) : name;
+      name += !d.language_code ? ' - Custom Voice' : '';
+
       return {
         value: `${d.id}`,
         name
       };
     };
 
-    const ttsVoices = list_voices.reduce((acc, voice) => {
-      // Play3.0 support all voice for PlayHT2.0*
-      const filteredVoiceEngine = credential.voice_engine === 'Play3.0' ?
-        `${credential.voice_engine}_PlayHT2.0_PlayHT2.0-turbo` : credential.voice_engine;
-      if (!filteredVoiceEngine.includes(voice.voice_engine)) {
+    const buildPlay30Payload = () => {
+      // PlayHT3.0 can play different languages with differrent voice.
+      // all voices will be added to english language by default and orther langauges will get voices from english.
+      const ttsVoices = ttsLanguagesPlayHt.map((l) => ({
+        ...l,
+        voices: l.value === 'english' ? list_voices.map((v) => buildVoice(v)) : []
+      }));
+      return tranform(ttsVoices, undefined, TtsModelPlayHT);
+    };
+
+    const buildPayload = () => {
+      const ttsVoices = list_voices.reduce((acc, voice) => {
+        if (!voice_engine.includes(voice.voice_engine)) {
+          return acc;
+        }
+        const languageCode = voice.language_code;
+        // custom voice does not have language code
+        if (!languageCode) {
+          voice.language_code = 'en';
+          voice.language = 'Custom-English';
+        }
+        const existingLanguage = acc.find((lang) => lang.value === languageCode);
+        if (existingLanguage) {
+          existingLanguage.voices.push(buildVoice(voice));
+        } else {
+          acc.push({
+            value: voice.language_code,
+            name: voice.language,
+            voices: [buildVoice(voice)]
+          });
+        }
         return acc;
-      }
-      const languageCode = voice.language_code;
-      // custom voice does not have language code
-      if (!languageCode) {
-        voice.language_code = 'en';
-        voice.language = 'Custom-English';
-      }
-      const existingLanguage = acc.find((lang) => lang.value === languageCode);
-      if (existingLanguage) {
-        existingLanguage.voices.push(buildVoice(voice));
-      } else {
-        acc.push({
-          value: voice.language_code,
-          name: voice.language,
-          voices: [buildVoice(voice)]
-        });
-      }
-      return acc;
-    }, []);
-    return tranform(ttsVoices, undefined, TtsModelPlayHT);
+      }, []);
+      return tranform(ttsVoices, undefined, TtsModelPlayHT);
+    };
+
+    switch (voice_engine) {
+      case 'Play3.0':
+        return buildPlay30Payload();
+
+      default:
+        return buildPayload();
+    }
   }
   return tranform(TtsPlayHtLanguagesVoices, undefined, TtsModelPlayHT);
 }

diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -29,7 +29,7 @@
     "@jambonz/lamejs": "^1.2.2",
     "@jambonz/mw-registrar": "^0.2.7",
     "@jambonz/realtimedb-helpers": "^0.8.10",
-    "@jambonz/speech-utils": "^0.1.18",
+    "@jambonz/speech-utils": "^0.1.19",
     "@jambonz/time-series": "^0.2.8",
     "@jambonz/verb-specifications": "^0.0.72",
     "@soniox/soniox-node": "^1.2.2",