Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add eleven labs #240

Merged
merged 3 commits into from
Oct 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion lib/models/speech-credential.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class SpeechCredential extends Model {
return rows;
}

static async isAvailableVendorAndLabel(service_provider_sid, account_sid, vendor, label) {
static async getSpeechCredentialsByVendorAndLabel(service_provider_sid, account_sid, vendor, label) {
let sql;
if (account_sid) {
sql = 'SELECT * FROM speech_credentials WHERE account_sid = ? AND vendor = ? AND label = ?';
Expand Down
110 changes: 106 additions & 4 deletions lib/routes/api/speech-credentials.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ const {
testDeepgramStt,
testSonioxStt,
testIbmTts,
testIbmStt
testIbmStt,
testElevenlabs
} = require('../../utils/speech-utils');
const bent = require('bent');
const {promisePool} = require('../../db');

const validateAdd = async(req) => {
Expand Down Expand Up @@ -127,7 +129,8 @@ const encryptCredential = (obj) => {
custom_stt_url,
custom_tts_url,
auth_token = '',
cobalt_server_uri
cobalt_server_uri,
model_id
} = obj;

switch (vendor) {
Expand Down Expand Up @@ -202,6 +205,12 @@ const encryptCredential = (obj) => {
const cobaltData = JSON.stringify({cobalt_server_uri});
return encrypt(cobaltData);

case 'elevenlabs':
assert(api_key, 'invalid elevenLabs speech credential: api_key is required');
assert(model_id, 'invalid elevenLabs speech credential: model_id is required');
const elevenlabsData = JSON.stringify({api_key, model_id});
return encrypt(elevenlabsData);

default:
if (vendor.startsWith('custom:')) {
const customData = JSON.stringify({auth_token, custom_stt_url, custom_tts_url});
Expand Down Expand Up @@ -236,7 +245,7 @@ router.post('/', async(req, res) => {

// Check if vendor and label is already used for account or SP
if (label) {
const existingSpeech = await SpeechCredential.isAvailableVendorAndLabel(
const existingSpeech = await SpeechCredential.getSpeechCredentialsByVendorAndLabel(
service_provider_sid, account_sid, vendor, label);
if (existingSpeech.length > 0) {
throw new DbErrorUnprocessableRequest(`Label ${label} is already in use for another speech credential`);
Expand Down Expand Up @@ -317,6 +326,10 @@ function decryptCredential(obj, credential, logger) {
} else if ('soniox' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
} else if ('elevenlabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
obj.model_id = o.model_id;
} else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = obscureKey(o.auth_token);
Expand Down Expand Up @@ -454,7 +467,7 @@ router.put('/:sid', async(req, res) => {
custom_stt_endpoint_url,
custom_stt_url,
custom_tts_url,
cobalt_server_uri
cobalt_server_uri,
} = req.body;

const newCred = {
Expand Down Expand Up @@ -732,6 +745,18 @@ router.get('/:sid/test', async(req, res) => {
SpeechCredential.sttTestResult(sid, false);
}
}
} else if (cred.vendor === 'elevenlabs') {
const {api_key, model_id} = credential;
if (cred.use_for_tts) {
try {
await testElevenlabs(logger, {api_key, model_id});
results.tts.status = 'ok';
SpeechCredential.ttsTestResult(sid, true);
} catch (err) {
results.tts = {status: 'fail', reason: err.message};
SpeechCredential.ttsTestResult(sid, false);
}
}
}

res.status(200).json(results);
Expand All @@ -741,4 +766,81 @@ router.get('/:sid/test', async(req, res) => {
}
});

/**
* Fetch speech voices and languages
*/

router.post('/voices', async(req, res) => {
const logger = req.app.locals.logger;
const {vendor, label} = req.body;
const account_sid = req.user.account_sid || req.body.account_sid;
const service_provider_sid = req.user.service_provider_sid ||
req.body.service_provider_sid || parseServiceProviderSid(req);
try {
res.status(200).json(await getTtsVoices(vendor, label, service_provider_sid, account_sid));
} catch (err) {
sysError(logger, res, err);
}
});

router.post('/languages', async(req, res) => {
const logger = req.app.locals.logger;
const {vendor, label} = req.body;
const account_sid = req.user.account_sid || req.body.account_sid;
const service_provider_sid = req.user.service_provider_sid ||
req.body.service_provider_sid || parseServiceProviderSid(req);
try {
res.status(200).json(await getTtsLanguages(vendor, label, service_provider_sid, account_sid));
} catch (err) {
sysError(logger, res, err);
}
});

const getTtsVoices = async(vendor, label, service_provider_sid, account_sid) => {
const credentials = await SpeechCredential.isAvailableVendorAndLabel(
service_provider_sid, account_sid, vendor, label);
const cred = credentials && credentials.length > 0 ? credentials[0] : null;
if (vendor === 'elevenlabs') {
const get = bent('https://api.elevenlabs.io', 'GET', 'json', {
...(cred && {
'xi-api-key' : cred.api_key
})
});
const resp = await get('/v1/voices');
return resp ? resp.voices.map((v) => {
return {
value: v.voice_id,
name: `${v.name} - ${v.labels.accent}, ${v.labels.description},
${v.labels.age}, ${v.labels.gender}, ${v.labels['use case']}`
};
}) : [];
}
return [];
};

const getTtsLanguages = async(vendor, label, service_provider_sid, account_sid) => {
const credentials = await SpeechCredential.isAvailableVendorAndLabel(
service_provider_sid, account_sid, vendor, label);
const cred = credentials && credentials.length > 0 ? credentials[0] : null;
if (vendor === 'elevenlabs') {
if (!cred) {
return [];
}
const get = bent('https://api.elevenlabs.io', 'GET', 'json', {
'xi-api-key' : cred.api_key
});
const resp = await get('/v1/models');
if (!resp || resp.length === 0) {
return [];
}
const model = resp.find((m) => m.model_id === cred.model_id);
return model ? model.languages.map((l) => {
return {
value: l.language_id,
name: l.name
};
}) : [];
}
};

module.exports = router;
26 changes: 25 additions & 1 deletion lib/utils/speech-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,29 @@ const testWellSaidTts = async(logger, credentials) => {
}
};

const testElevenlabs = async(logger, credentials) => {
const {api_key, model_id} = credentials;
try {
const post = bent('https://api.elevenlabs.io', 'POST', 'buffer', {
'xi-api-key': api_key,
'Accept': 'audio/mpeg',
'Content-Type': 'application/json'
});
const mp3 = await post('/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM', {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think we should hardcode voice id 21m00Tcm4TlvDq8ikWAM, instead use GET /Voices and just take the first one. They remove a voice id in the future

text: 'Hello',
model_id,
voice_settings: {
stability: 0.5,
similarity_boost: 0.5
}
});
return mp3;
} catch (err) {
logger.info({err}, 'synthEvenlabs returned error');
throw err;
}
};

const testIbmTts = async(logger, getTtsVoices, credentials) => {
const {tts_api_key, tts_region} = credentials;
const voices = await getTtsVoices({vendor: 'ibm', credentials: {tts_api_key, tts_region}});
Expand Down Expand Up @@ -251,5 +274,6 @@ module.exports = {
testDeepgramStt,
testIbmTts,
testIbmStt,
testSonioxStt
testSonioxStt,
testElevenlabs
};
23 changes: 23 additions & 0 deletions test/speech-credentials.js
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,29 @@ test('speech credentials tests', async(t) => {
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential for Cobalt');

/* add a credential for elevenlabs */
result = await request.post(`/Accounts/${account_sid}/SpeechCredentials`, {
resolveWithFullResponse: true,
auth: authUser,
json: true,
body: {
vendor: 'elevenlabs',
use_for_stt: true,
use_for_tts: false,
api_key: 'asdasdasdasddsadasda',
model_id: 'eleven_multilingual_v2'
}
});
t.ok(result.statusCode === 201, 'successfully added speech credential for Cobalt');
const elevenlabs_sid = result.body.sid;

/* delete the credential */
result = await request.delete(`/Accounts/${account_sid}/SpeechCredentials/${elevenlabs_sid}`, {
auth: authUser,
resolveWithFullResponse: true,
});
t.ok(result.statusCode === 204, 'successfully deleted speech credential for Cobalt');

await deleteObjectBySid(request, '/Accounts', account_sid);
await deleteObjectBySid(request, '/ServiceProviders', service_provider_sid);
t.end();
Expand Down