Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat TTS synthAduio from API #252

Merged
merged 11 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion app.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,8 @@ const {
const {
getTtsVoices,
getTtsSize,
purgeTtsCache
purgeTtsCache,
synthAudio
} = require('@jambonz/speech-utils')(JAMBONES_REDIS_SENTINELS || {
host: process.env.JAMBONES_REDIS_HOST,
port: process.env.JAMBONES_REDIS_PORT || 6379
Expand Down Expand Up @@ -98,6 +99,7 @@ app.locals = {
getTtsVoices,
getTtsSize,
purgeTtsCache,
synthAudio,
lookupAppBySid,
lookupAccountBySid,
lookupAccountByPhoneNumber,
Expand Down
1 change: 1 addition & 0 deletions lib/routes/api/accounts.js
Original file line number Diff line number Diff line change
Expand Up @@ -1038,4 +1038,5 @@ router.get('/:sid/Queues', async(req, res) => {
}
});


module.exports = router;
76 changes: 2 additions & 74 deletions lib/routes/api/speech-credentials.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ const assert = require('assert');
const Account = require('../../models/account');
const SpeechCredential = require('../../models/speech-credential');
const sysError = require('../error');
const {decrypt, encrypt, obscureKey} = require('../../utils/encrypt-decrypt');
const {decrypt, encrypt} = require('../../utils/encrypt-decrypt');
const {parseAccountSid, parseServiceProviderSid, parseSpeechCredentialSid} = require('./utils');
const {decryptCredential} = require('../../utils/speech-utils');
const {DbErrorUnprocessableRequest, DbErrorForbidden} = require('../../utils/errors');
const {
testGoogleTts,
Expand Down Expand Up @@ -274,79 +275,6 @@ router.post('/', async(req, res) => {
}
});

function decryptCredential(obj, credential, logger) {
if ('google' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
const key_header = '-----BEGIN PRIVATE KEY-----\n';
const obscured = {
...o,
private_key: `${key_header}${obscureKey(o.private_key.slice(key_header.length, o.private_key.length))}`
};
obj.service_key = JSON.stringify(obscured);
}
else if ('aws' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.access_key_id = o.access_key_id;
obj.secret_access_key = obscureKey(o.secret_access_key);
obj.aws_region = o.aws_region;
logger.info({obj, o}, 'retrieving aws speech credential');
}
else if ('microsoft' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
obj.region = o.region;
obj.use_custom_tts = o.use_custom_tts;
obj.custom_tts_endpoint = o.custom_tts_endpoint;
obj.custom_tts_endpoint_url = o.custom_tts_endpoint_url;
obj.use_custom_stt = o.use_custom_stt;
obj.custom_stt_endpoint = o.custom_stt_endpoint;
obj.custom_stt_endpoint_url = o.custom_stt_endpoint_url;
logger.info({obj, o}, 'retrieving azure speech credential');
}
else if ('wellsaid' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
}
else if ('nuance' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.client_id = o.client_id;
obj.secret = o.secret ? obscureKey(o.secret) : null;
}
else if ('deepgram' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
}
else if ('ibm' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.tts_api_key = obscureKey(o.tts_api_key);
obj.tts_region = o.tts_region;
obj.stt_api_key = obscureKey(o.stt_api_key);
obj.stt_region = o.stt_region;
obj.instance_id = o.instance_id;
} else if ('nvidia' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.riva_server_uri = o.riva_server_uri;
} else if ('cobalt' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.cobalt_server_uri = o.cobalt_server_uri;
} else if ('soniox' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
} else if ('elevenlabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
obj.model_id = o.model_id;
} else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = obscureKey(o.auth_token);
obj.custom_stt_url = o.custom_stt_url;
obj.custom_tts_url = o.custom_tts_url;
} else if ('assemblyai' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = obscureKey(o.api_key);
}
}

/**
* retrieve all speech credentials for an account
*/
Expand Down
82 changes: 82 additions & 0 deletions lib/routes/api/tts-cache.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@ const router = require('express').Router();
const {
parseAccountSid
} = require('./utils');
const SpeechCredential = require('../../models/speech-credential');
const fs = require('fs');
const { v4: uuidv4 } = require('uuid');
const {DbErrorBadRequest} = require('../../utils/errors');
const Account = require('../../models/account');
const sysError = require('../error');
const { getSpeechCredential, decryptCredential } = require('../../utils/speech-utils');

router.delete('/', async(req, res) => {
const {purgeTtsCache} = req.app.locals;
Expand All @@ -26,4 +33,79 @@ router.get('/', async(req, res) => {
res.status(200).json({size});
});

router.post('/Synthesize', async(req, res) => {
const {logger, synthAudio} = req.app.locals;
try {
const accountSid = parseAccountSid(req);
const body = req.body;
if (!body.speech_credential_sid || !body.text || !body.language || !body.voice) {
throw new DbErrorBadRequest('speech_credential_sid, text, language, voice are all required');
}

const result = await Account.retrieve(accountSid);
if (!result || result.length === 0 || !result[0].is_active) {
throw new DbErrorBadRequest(`Account not found for sid ${accountSid}`);
}
const credentials = await SpeechCredential.retrieve(body.speech_credential_sid);
if (!credentials || credentials.length === 0) {
throw new
DbErrorBadRequest(`There is no available speech credential for ${body.speech_credential_sid}`);
}
const {credential, ...obj} = credentials[0];

decryptCredential(obj, credential, logger, false);
const cred = getSpeechCredential(obj, logger);

const { text, language, engine = 'standard' } = body;
const salt = uuidv4();
/* parse Nuance voices into name and model */
let voice = body.voice;
let model;
if (cred.vendor === 'nuance' && voice) {
const arr = /([A-Za-z-]*)\s+-\s+(enhanced|standard)/.exec(voice);
if (arr) {
voice = arr[1];
model = arr[2];
}
}
const stats = {
histogram: () => {},
increment: () => {},
};
const { filePath } = await synthAudio(stats, {
account_sid: accountSid,
text,
vendor: cred.vendor,
language,
voice,
engine,
model,
salt,
credentials: cred,
disableTtsCache: false
});

const stat = fs.statSync(filePath);
res.writeHead(200, {
'Content-Type': 'audio/mpeg',
'Content-Length': stat.size,
});

const readStream = fs.createReadStream(filePath);
// We replaced all the event handlers with a simple call to readStream.pipe()
readStream.pipe(res);

readStream.on('end', () => {
// Delete the file after it's been read
fs.unlink(filePath, (err) => {
if (err) throw err;
logger.info(`${filePath} was deleted`);
});
});

} catch (err) {
sysError(logger, res, err);
}
});

module.exports = router;
59 changes: 59 additions & 0 deletions lib/swagger/swagger.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4209,6 +4209,65 @@ paths:
application/json:
schema:
$ref: '#/components/schemas/RegisteredClient'
/Accounts/{AccountSid}/TtsCache/Synthesize:
post:
tags:
- Accounts
summary: get TTS from provider
operationId: Synthesize
requestBody:
content:
application/json:
schema:
type: object
properties:
speech_credential_sid:
type: string
description: Speech credential Sid
example: 553b4b6b-8918-4394-a46d-1e3c5a3c717b
text:
type: string
description: the text to convert to audio
example: Hello How are you
language:
type: string
description: language is used in text
example: en-US
voice:
type: string
description: voice ID
example: en-US-Standard-C
required:
- speech_credential_sid
- text
- language
- voice
responses:
200:
description: Audio is created
content:
audio/mpeg:
schema:
type: string
format: binary
400:
description: bad request
content:
application/json:
schema:
$ref: '#/components/schemas/GeneralError'
422:
description: unprocessable entity
content:
application/json:
schema:
$ref: '#/components/schemas/GeneralError'
500:
description: system error
content:
application/json:
schema:
$ref: '#/components/schemas/GeneralError'
/Lcrs:
post:
tags:
Expand Down
106 changes: 105 additions & 1 deletion lib/utils/speech-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ const { SpeechClient } = require('@soniox/soniox-node');
const bent = require('bent');
const fs = require('fs');
const { AssemblyAI } = require('assemblyai');
const {decrypt, obscureKey} = require('./encrypt-decrypt');


const testSonioxStt = async(logger, credentials) => {
Expand Down Expand Up @@ -287,6 +288,107 @@ const testAssemblyStt = async(logger, credentials) => {
});
};

const getSpeechCredential = (credential, logger) => {
const {vendor} = credential;
logger.info(
`Speech vendor: ${credential.vendor} ${credential.label ? `, label: ${credential.label}` : ''} selected`);
if ('google' === vendor) {
try {
const cred = JSON.parse(credential.service_key.replace(/\n/g, '\\n'));
return {
...credential,
credentials: cred
};
} catch (err) {
logger.info({err}, `malformed google service_key provisioned for account ${credential.speech_credential_sid}`);
}
}
else if (['aws', 'polly'].includes(vendor)) {
return {
...credential,
accessKeyId: credential.access_key_id,
secretAccessKey: credential.secret_access_key,
region: credential.aws_region || 'us-east-1'
};
}
return credential;
};

function decryptCredential(obj, credential, logger, isObscureKey = true) {
if ('google' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
const key_header = '-----BEGIN PRIVATE KEY-----\n';
const obscured = {
...o,
private_key: `${key_header}${isObscureKey ?
obscureKey(o.private_key.slice(key_header.length, o.private_key.length)) :
o.private_key.slice(key_header.length, o.private_key.length)}`
};
obj.service_key = JSON.stringify(obscured);
}
else if ('aws' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.access_key_id = o.access_key_id;
obj.secret_access_key = isObscureKey ? obscureKey(o.secret_access_key) : o.secret_access_key;
obj.aws_region = o.aws_region;
logger.info({obj, o}, 'retrieving aws speech credential');
}
else if ('microsoft' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.region = o.region;
obj.use_custom_tts = o.use_custom_tts;
obj.custom_tts_endpoint = o.custom_tts_endpoint;
obj.custom_tts_endpoint_url = o.custom_tts_endpoint_url;
obj.use_custom_stt = o.use_custom_stt;
obj.custom_stt_endpoint = o.custom_stt_endpoint;
obj.custom_stt_endpoint_url = o.custom_stt_endpoint_url;
logger.info({obj, o}, 'retrieving azure speech credential');
}
else if ('wellsaid' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
}
else if ('nuance' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.client_id = o.client_id;
obj.secret = o.secret ? (isObscureKey ? obscureKey(o.secret) : o.secret) : null;
}
else if ('deepgram' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
}
else if ('ibm' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.tts_api_key = isObscureKey ? obscureKey(o.tts_api_key) : o.tts_api_key;
obj.tts_region = o.tts_region;
obj.stt_api_key = isObscureKey ? obscureKey(o.stt_api_key) : o.stt_api_key;
obj.stt_region = o.stt_region;
obj.instance_id = o.instance_id;
} else if ('nvidia' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.riva_server_uri = o.riva_server_uri;
} else if ('cobalt' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.cobalt_server_uri = o.cobalt_server_uri;
} else if ('soniox' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
} else if ('elevenlabs' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
obj.model_id = o.model_id;
} else if (obj.vendor.startsWith('custom:')) {
const o = JSON.parse(decrypt(credential));
obj.auth_token = isObscureKey ? obscureKey(o.auth_token) : o.auth_token;
obj.custom_stt_url = o.custom_stt_url;
obj.custom_tts_url = o.custom_tts_url;
} else if ('assemblyai' === obj.vendor) {
const o = JSON.parse(decrypt(credential));
obj.api_key = isObscureKey ? obscureKey(o.api_key) : o.api_key;
}
}

module.exports = {
testGoogleTts,
testGoogleStt,
Expand All @@ -303,5 +405,7 @@ module.exports = {
testIbmStt,
testSonioxStt,
testElevenlabs,
testAssemblyStt
testAssemblyStt,
getSpeechCredential,
decryptCredential
};
Loading