Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integrate Moondream into Twitter and Discord #16

Merged
merged 16 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions src/clients/discord/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ import settings from "../../core/settings.ts";
import { AudioMonitor } from "./audioMonitor.ts";
import { commands } from "./commands.ts";
import { InterestChannels, ResponseType } from "./types.ts";
import ImageRecognitionService from "../../services/imageRecognition.ts"
import { extractAnswer } from "../../core/util.ts";
import { SpeechSynthesizer } from "../../services/speechSynthesis.ts";
import WavEncoder from "wav-encoder";

Expand Down Expand Up @@ -101,6 +103,7 @@ export class DiscordClient extends EventEmitter {
private agent: Agent;
private bio: string;
private transcriber: any;
private imageRecognitionService: ImageRecognitionService;
speechSynthesizer: SpeechSynthesizer;

constructor(agent: Agent, bio: string) {
Expand All @@ -124,6 +127,9 @@ export class DiscordClient extends EventEmitter {

this.initializeTranscriber();

this.imageRecognitionService = new ImageRecognitionService();
this.imageRecognitionService.initialize();

this.client.once(Events.ClientReady, async (readyClient: { user: { tag: any; id: any } }) => {
console.log(`Logged in as ${readyClient.user?.tag}`);
console.log("Use this URL to add the bot to your server:");
Expand Down Expand Up @@ -210,6 +216,12 @@ export class DiscordClient extends EventEmitter {
const user_id = message.author.id as UUID;
const userName = message.author.username;
const channelId = message.channel.id;

// Check for image attachments
if (message.attachments.size > 0) {
await this.handleImageRecognition(message);
}

const textContent = message.content;

try {
Expand Down Expand Up @@ -316,6 +328,20 @@ export class DiscordClient extends EventEmitter {
}
}

private async handleImageRecognition(message: DiscordMessage) {
const attachment = message.attachments.first();
if (attachment && attachment.contentType?.startsWith('image/')) {
try {
const recognizedText = await this.imageRecognitionService.recognizeImage(attachment.url);
const description = extractAnswer(recognizedText[0]);
// Add the image description to the completion context
message.content += `\nImage description: ${description}`;
} catch (error) {
console.error('Error recognizing image:', error);
await message.reply('Sorry, I encountered an error while processing the image.');
}
}
}

private async ensureUserExists(agentId: UUID, userName: string, botToken: string | null = null) {
if (!userName && botToken) {
Expand Down
7 changes: 5 additions & 2 deletions src/clients/twitter/base.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ import settings from "../../core/settings.ts";

import { fileURLToPath } from 'url';
import ImageRecognitionService from "../../services/imageRecognition.ts";
import { extractAnswer } from "../../core/util.ts";

const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
Expand Down Expand Up @@ -123,8 +124,10 @@ export class ClientBase extends EventEmitter {

async describeImage(imageUrl: string): Promise<string> {
try {
const description = await this.imageRecognitionService.recognizeImage(imageUrl);
return description[0] || 'Unable to describe the image.';
const recognizedText = await this.imageRecognitionService.recognizeImage(imageUrl);
const description = extractAnswer(recognizedText[0]);

return description || 'Unable to describe the image.';
} catch (error) {
console.error('Error describing image:', error);
return 'Error occurred while describing the image.';
Expand Down
9 changes: 8 additions & 1 deletion src/core/util.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,4 +41,11 @@ export function prependWavHeader(readable: Readable, audioLength: number, sample
passThrough.end();
});
return passThrough;
}
}


export function extractAnswer(text: string): string {
const startIndex = text.indexOf('Answer: ') + 8;
const endIndex = text.indexOf('<|endoftext|>', 11);
return text.slice(startIndex, endIndex);
};
3 changes: 2 additions & 1 deletion src/services/imageRecognition.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ class ImageRecognitionService {

constructor() {
this.modelId = 'Xenova/moondream2';
this.device = 'webgpu';
// this.device = 'webgpu';
this.device = 'cpu';
this.model = null;
this.processor = null;
this.tokenizer = null;
Expand Down