From 4e470428d9456f42c6fe321e567f47a365dbb335 Mon Sep 17 00:00:00 2001 From: Quan HL Date: Thu, 12 Oct 2023 15:43:50 +0700 Subject: [PATCH] add eleven labs --- lib/routes/api/speech-credentials.js | 27 +++++++++++++++++++++++++-- lib/utils/speech-utils.js | 26 +++++++++++++++++++++++++- test/speech-credentials.js | 23 +++++++++++++++++++++++ 3 files changed, 73 insertions(+), 3 deletions(-) diff --git a/lib/routes/api/speech-credentials.js b/lib/routes/api/speech-credentials.js index fd9d45e1..70f08c24 100644 --- a/lib/routes/api/speech-credentials.js +++ b/lib/routes/api/speech-credentials.js @@ -127,7 +127,8 @@ const encryptCredential = (obj) => { custom_stt_url, custom_tts_url, auth_token = '', - cobalt_server_uri + cobalt_server_uri, + model_id } = obj; switch (vendor) { @@ -202,6 +203,12 @@ const encryptCredential = (obj) => { const cobaltData = JSON.stringify({cobalt_server_uri}); return encrypt(cobaltData); + case 'elevenlabs': + assert(api_key, 'invalid elevenLabs speech credential: api_key is required'); + assert(model_id, 'invalid elevenLabs speech credential: model_id is required'); + const elevenlabsData = JSON.stringify({api_key, model_id}); + return encrypt(elevenlabsData); + default: if (vendor.startsWith('custom:')) { const customData = JSON.stringify({auth_token, custom_stt_url, custom_tts_url}); @@ -317,6 +324,10 @@ function decryptCredential(obj, credential, logger) { } else if ('soniox' === obj.vendor) { const o = JSON.parse(decrypt(credential)); obj.api_key = obscureKey(o.api_key); + } else if ('elevenlabs' === obj.vendor) { + const o = JSON.parse(decrypt(credential)); + obj.api_key = obscureKey(o.api_key); + obj.model_id = o.model_id; } else if (obj.vendor.startsWith('custom:')) { const o = JSON.parse(decrypt(credential)); obj.auth_token = obscureKey(o.auth_token); @@ -454,7 +465,7 @@ router.put('/:sid', async(req, res) => { custom_stt_endpoint_url, custom_stt_url, custom_tts_url, - cobalt_server_uri + cobalt_server_uri, } = req.body; const newCred = { @@ -732,6 +743,18 @@ router.get('/:sid/test', async(req, res) => { SpeechCredential.sttTestResult(sid, false); } } + } else if (cred.vendor === 'elevenlabs') { + const {api_key, model_id} = credential; + if (cred.use_for_tts) { + try { + await testWellSaidTts(logger, {api_key, model_id}); + results.tts.status = 'ok'; + SpeechCredential.ttsTestResult(sid, true); + } catch (err) { + results.tts = {status: 'fail', reason: err.message}; + SpeechCredential.ttsTestResult(sid, false); + } + } } res.status(200).json(results); diff --git a/lib/utils/speech-utils.js b/lib/utils/speech-utils.js index 0c6f8c28..31ca5ef7 100644 --- a/lib/utils/speech-utils.js +++ b/lib/utils/speech-utils.js @@ -203,6 +203,29 @@ const testWellSaidTts = async(logger, credentials) => { } }; +const testElevenlabs = async(logger, credentials) => { + const {api_key, model_id} = credentials; + try { + const post = bent('https://api.elevenlabs.io', 'POST', 'buffer', { + 'xi-api-key': api_key, + 'Accept': 'audio/mpeg', + 'Content-Type': 'application/json' + }); + const mp3 = await post('/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM', { + text: 'Hello', + model_id, + voice_settings: { + stability: 0.5, + similarity_boost: 0.5 + } + }); + return mp3; + } catch (err) { + logger.info({err}, 'synthEvenlabs returned error'); + throw err; + } +}; + const testIbmTts = async(logger, getTtsVoices, credentials) => { const {tts_api_key, tts_region} = credentials; const voices = await getTtsVoices({vendor: 'ibm', credentials: {tts_api_key, tts_region}}); @@ -251,5 +274,6 @@ module.exports = { testDeepgramStt, testIbmTts, testIbmStt, - testSonioxStt + testSonioxStt, + testElevenlabs }; diff --git a/test/speech-credentials.js b/test/speech-credentials.js index 19fe50b9..b2b54490 100644 --- a/test/speech-credentials.js +++ b/test/speech-credentials.js @@ -469,6 +469,29 @@ test('speech credentials tests', async(t) => { }); t.ok(result.statusCode === 204, 'successfully deleted speech credential for Cobalt'); + /* add a credential for elevenlabs */ + result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, { + resolveWithFullResponse: true, + auth: authUser, + json: true, + body: { + vendor: 'elevenlabs', + use_for_stt: true, + use_for_tts: false, + api_key: 'asdasdasdasddsadasda', + model_id: 'eleven_multilingual_v2' + } + }); + t.ok(result.statusCode === 201, 'successfully added speech credential for Cobalt'); + const elevenlabs_sid = result.body.sid; + + /* delete the credential */ + result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${elevenlabs_sid}`, { + auth: authUser, + resolveWithFullResponse: true, + }); + t.ok(result.statusCode === 204, 'successfully deleted speech credential for Cobalt'); + await deleteObjectBySid(request, '/Accounts', account_sid); await deleteObjectBySid(request, '/ServiceProviders', service_provider_sid); t.end();