Skip to content

Commit

Permalink
update speech synth
Browse files Browse the repository at this point in the history
  • Loading branch information
lalalune committed Jul 29, 2024
1 parent 2c6cced commit 946d217
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 3 deletions.
25 changes: 23 additions & 2 deletions src/clients/discord/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,11 @@ import { default as getUuid, default as uuid } from "uuid-by-string";
import { Agent } from '../../core/agent.ts';
import { adapter } from "../../core/db.ts";
import settings from "../../core/settings.ts";
import { textToSpeech } from "../elevenlabs/index.ts";
import { AudioMonitor } from "./audioMonitor.ts";
import { commands } from "./commands.ts";
import { InterestChannels, ResponseType } from "./types.ts";
import { SpeechSynthesizer } from "../../services/speechSynthesis.ts";
import WavEncoder from "wav-encoder";

export const messageHandlerTemplate =
// `{{actionExamples}}
Expand Down Expand Up @@ -100,6 +101,7 @@ export class DiscordClient extends EventEmitter {
private agent: Agent;
private bio: string;
private transcriber: any;
speechSynthesizer: SpeechSynthesizer;

constructor(agent: Agent, bio: string) {
super();
Expand Down Expand Up @@ -256,6 +258,25 @@ export class DiscordClient extends EventEmitter {
}
}

async textToSpeech(text: string): Promise<Readable> {
if(!this.speechSynthesizer) {
this.speechSynthesizer = await SpeechSynthesizer.create("./model.onnx");
}

console.log("Synthesizing speech...");
// Synthesize the speech to get a Float32Array of single channel 22050Hz audio data
const audio = await this.speechSynthesizer.synthesize("Four score and seven years ago.");
console.log("Speech synthesized");
// Encode the audio data into a WAV format
const { encode } = WavEncoder;
const audioData = {
sampleRate: 22050,
channelData: [audio]
};
const wavArrayBuffer = encode.sync(audioData);
return wavArrayBuffer;
}

async speechToText(audioBuffer: Buffer) {
if (!this.transcriber) {
console.log("Transcriber not initialized. Initializing now...");
Expand Down Expand Up @@ -692,7 +713,7 @@ export class DiscordClient extends EventEmitter {
if (requestedResponseType == ResponseType.RESPONSE_TEXT) {
return Readable.from(content);
} else {
return await textToSpeech(content);
return await this.textToSpeech(content);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import { Agent } from './core/agent.ts';
import fs from "fs";
import yargs from "yargs";
import { SpeechSynthesizer } from "./services/speechSynthesis.ts";
import WavEncoderPkg from "wav-encoder";
import WavEncoder from "wav-encoder";
interface Arguments {
character?: string;
twitter?: boolean;
Expand Down

0 comments on commit 946d217

Please sign in to comment.