From 3418abfa689210b4e6f30031674a08edbfc4d1ab Mon Sep 17 00:00:00 2001 From: moon Date: Mon, 29 Jul 2024 10:11:47 -0700 Subject: [PATCH 1/3] Update sqlite_vss for linux --- README.md | 28 ++++++++-------------------- src/core/sqlite_vss.ts | 3 ++- 2 files changed, 10 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index a541d0e3c86..82b5ca6df77 100644 --- a/README.md +++ b/README.md @@ -2,25 +2,13 @@ A conversational agent embodiment of Ruby, powered by [bgent](https://github.com/JoinTheAlliance/bgent), with elevenlabs and OpenAI Whisper for text-to-speech and speech-to-text services. -## Configuration -- Get API keys for Discord and Eleven Labs -- Set the appropriate environment variables - -For help with setting up your Discord Bot, check out here: https://discordjs.guide/preparations/setting-up-a-bot-application.html +## Linux Installation +``` +apt-get install build-essential libatlas-base-dev +``` -## Environment Variables - -### General +## Configuration +- Get API keys for Discord and Twitter +- Copy .env.example to .env and fill in the appropriate values -| Variable Name | Values | Defaults | Description | -|---------------------------------------|-----------------------|--------------------------|----------------------------------------------------------------------------------| -| DISCORD_API_TOKEN | string | '' | Discord bot API token. | -| ELEVENLABS_XI_API_KEY | string | '' | API key for Eleven Labs. | -| ELEVENLABS_MODEL_ID | string | 'eleven_multilingual_v2' | Model ID for a specific language model in Eleven Labs. | -| ELEVENLABS_VOICE_ID | string | '21m00Tcm4TlvDq8ikWAM' | Voice ID for a specific voice in Eleven Labs. Default is "Rachel". | -| ELEVENLABS_VOICE_STABILITY | number | 0.5 | Stability parameter for Eleven Labs voice synthesis. | -| ELEVENLABS_VOICE_SIMILARITY_BOOST | number | 0.9 | Similarity boost for Eleven Labs voice synthesis. | -| ELEVENLABS_VOICE_STYLE | number | 0.66 | Style parameter for Eleven Labs voice synthesis. | -| ELEVENLABS_VOICE_USE_SPEAKER_BOOST | boolean | false | Whether to use speaker boost in Eleven Labs voice synthesis. | -| ELEVENLABS_OPTIMIZE_STREAMING_LATENCY | 0 \| 1 \| 2 \| 3 \| 4 | 4 | Level of optimization for streaming latency in Eleven Labs. | -| ELEVENLABS_OUTPUT_FORMAT | 'pcm_16000' | 'pcm_16000' | Output format for voice synthesis. Currently only pcm_16000 will work properly. | +For help with setting up your Discord Bot, check out here: https://discordjs.guide/preparations/setting-up-a-bot-application.html \ No newline at end of file diff --git a/src/core/sqlite_vss.ts b/src/core/sqlite_vss.ts index b8b7af06d52..cecf3cbdfaa 100644 --- a/src/core/sqlite_vss.ts +++ b/src/core/sqlite_vss.ts @@ -24,7 +24,6 @@ function validPlatform(platform: string, arch: string): boolean { function extensionSuffix(platform: string): string { if (platform === "win32") return "dll"; if (platform === "darwin") return "dylib"; - if (platform === "linux") return ""; return "so"; } @@ -72,6 +71,8 @@ export function getVssLoadablePath(): string { return loadablePathResolver("vss0"); } +// Note: There is a weird bug on linux where the extension is not loaded correctly, so we remove the .so extension + export function loadVector(db: Database): void { db.loadExtension(getVectorLoadablePath().replace('.so', '')); } From 2c6cced5d983942fb6a60be09387e0029f6848a2 Mon Sep 17 00:00:00 2001 From: moon Date: Mon, 29 Jul 2024 10:49:10 -0700 Subject: [PATCH 2/3] Update llama to better model --- src/services/llama.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/services/llama.ts b/src/services/llama.ts index 05a90630729..cf8323d54ec 100644 --- a/src/services/llama.ts +++ b/src/services/llama.ts @@ -24,7 +24,7 @@ class LlamaService { constructor() { this.llama = undefined; this.model = undefined; - this.modelUrl = "https://cdn-lfs-us-1.huggingface.co/repos/1a/35/1a356434d698e78d7edc87f5552d7ac0141a16aa99fb4b8467bc59b201ce53a8/4fd4066c43347d388c43abdf8a27ea093b83932b10c741574e10a67c6d48e0b0?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27ggml-model-Q4_K_M.gguf%3B+filename%3D%22ggml-model-Q4_K_M.gguf%22%3B&Expires=1722523448&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMjUyMzQ0OH19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzFhLzM1LzFhMzU2NDM0ZDY5OGU3OGQ3ZWRjODdmNTU1MmQ3YWMwMTQxYTE2YWE5OWZiNGI4NDY3YmM1OWIyMDFjZTUzYTgvNGZkNDA2NmM0MzM0N2QzODhjNDNhYmRmOGEyN2VhMDkzYjgzOTMyYjEwYzc0MTU3NGUxMGE2N2M2ZDQ4ZTBiMD9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=UzDhO69kBDC1pYhb0tFbbvkyZMYaPE7xFKQImjShmqazSEUQRIXihp3KGr5xMXN8kx11oenhoCMqcc-pzRIyae9K8cQPjb5-M3eW2zHUXUi3CAXe8G6tNnpH6W6BkJuL6-l%7E-kgpTr44NUA4p8FpABTKWccUmWCeuN5SA3mKOCyMdTdDBgBtDqQ0UEkiVjmYFVwZX6ZTW3AtasPqrfS80Q1z0q-5uEkkdsHi1KVycWpv%7ENCmrHEBH1VXxGmjBa9hVWJS9yY95lq9yyrbdtH%7EWv5TDAFDLrYYqHXx-7x-kB7zbd18T5Aui1kqNvJcxZTnhxW7r3HlrFKAfh5t7oSvOA__&Key-Pair-Id=K24J24Z295AEI9"; + this.modelUrl = "https://cdn-lfs-us-1.huggingface.co/repos/77/fa/77fa6eda454ebafe29b05a62c2de140b074bb8beb90cd81a3d5528fa0db92e2e/5880a37f0fd38b083c5dca14aaf697e24b4c9da1cb2f27bde5bbdef35d7a6b17?response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27llama3-8B-DarkIdol-1.0-Q4_K_S-imat.gguf%3B+filename%3D%22llama3-8B-DarkIdol-1.0-Q4_K_S-imat.gguf%22%3B&Expires=1722533805&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTcyMjUzMzgwNX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmh1Z2dpbmdmYWNlLmNvL3JlcG9zLzc3L2ZhLzc3ZmE2ZWRhNDU0ZWJhZmUyOWIwNWE2MmMyZGUxNDBiMDc0YmI4YmViOTBjZDgxYTNkNTUyOGZhMGRiOTJlMmUvNTg4MGEzN2YwZmQzOGIwODNjNWRjYTE0YWFmNjk3ZTI0YjRjOWRhMWNiMmYyN2JkZTViYmRlZjM1ZDdhNmIxNz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=YQGVqCSy3Yc0rJDvC6Rs5yIo24772JSlaDh8hjrkKyGNc1OhJr6YsdAf6zjmHcJy0GE2rXqpY4Zmv5ycpwtcE8rCVYmsp7-YhcIq2Ivd-sBXQ-p2fGlrGveN9WcaRqd%7E4%7Eo4YVPSUF0TLIbKNn2jRNOkikW9jaPKHewl0fa-o5Elu-J%7EsbIR9lJFL3PnRRjkCHwVkLMO03wRcrSssTInFhXQzPc5lVrzqVvNkst-WrGig5A8H1zEq85VgeyDnQpPsjXkae%7E4ADa13VHEd0fhEDplYkf2CF-lzztzd7y3UPfoy6WmqX407mnh%7Ep%7E4AaA1YXkYrffkEWveT%7Ewh%7E3EUng__&Key-Pair-Id=K24J24Z295AEI9"; const modelName = "model.gguf"; console.log("modelName", modelName) this.modelPath = path.join(__dirname, modelName); From 946d2179d3beee04d862ebcd6188d8155e7c5788 Mon Sep 17 00:00:00 2001 From: moon Date: Mon, 29 Jul 2024 12:01:21 -0700 Subject: [PATCH 3/3] update speech synth --- src/clients/discord/index.ts | 25 +++++++++++++++++++++++-- src/index.ts | 2 +- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/src/clients/discord/index.ts b/src/clients/discord/index.ts index d5f406fe91a..e28cba03fac 100644 --- a/src/clients/discord/index.ts +++ b/src/clients/discord/index.ts @@ -38,10 +38,11 @@ import { default as getUuid, default as uuid } from "uuid-by-string"; import { Agent } from '../../core/agent.ts'; import { adapter } from "../../core/db.ts"; import settings from "../../core/settings.ts"; -import { textToSpeech } from "../elevenlabs/index.ts"; import { AudioMonitor } from "./audioMonitor.ts"; import { commands } from "./commands.ts"; import { InterestChannels, ResponseType } from "./types.ts"; +import { SpeechSynthesizer } from "../../services/speechSynthesis.ts"; +import WavEncoder from "wav-encoder"; export const messageHandlerTemplate = // `{{actionExamples}} @@ -100,6 +101,7 @@ export class DiscordClient extends EventEmitter { private agent: Agent; private bio: string; private transcriber: any; + speechSynthesizer: SpeechSynthesizer; constructor(agent: Agent, bio: string) { super(); @@ -256,6 +258,25 @@ export class DiscordClient extends EventEmitter { } } + async textToSpeech(text: string): Promise { + if(!this.speechSynthesizer) { + this.speechSynthesizer = await SpeechSynthesizer.create("./model.onnx"); + } + + console.log("Synthesizing speech..."); + // Synthesize the speech to get a Float32Array of single channel 22050Hz audio data + const audio = await this.speechSynthesizer.synthesize("Four score and seven years ago."); + console.log("Speech synthesized"); + // Encode the audio data into a WAV format + const { encode } = WavEncoder; + const audioData = { + sampleRate: 22050, + channelData: [audio] + }; + const wavArrayBuffer = encode.sync(audioData); + return wavArrayBuffer; + } + async speechToText(audioBuffer: Buffer) { if (!this.transcriber) { console.log("Transcriber not initialized. Initializing now..."); @@ -692,7 +713,7 @@ export class DiscordClient extends EventEmitter { if (requestedResponseType == ResponseType.RESPONSE_TEXT) { return Readable.from(content); } else { - return await textToSpeech(content); + return await this.textToSpeech(content); } } diff --git a/src/index.ts b/src/index.ts index ecad9936e3a..874779f4713 100644 --- a/src/index.ts +++ b/src/index.ts @@ -8,7 +8,7 @@ import { Agent } from './core/agent.ts'; import fs from "fs"; import yargs from "yargs"; import { SpeechSynthesizer } from "./services/speechSynthesis.ts"; -import WavEncoderPkg from "wav-encoder"; +import WavEncoder from "wav-encoder"; interface Arguments { character?: string; twitter?: boolean;