diff --git a/.gitignore b/.gitignore index b34c0f72..5d6bcb66 100644 --- a/.gitignore +++ b/.gitignore @@ -11,3 +11,5 @@ server/manifest.go # VSCode .vscode/ +.aider* +.env diff --git a/server/ai/anthropic/anthropic.go b/server/ai/anthropic/anthropic.go index b8ad60cc..12c15f6c 100644 --- a/server/ai/anthropic/anthropic.go +++ b/server/ai/anthropic/anthropic.go @@ -1,7 +1,9 @@ package anthropic import ( + "encoding/base64" "fmt" + "io" "net/http" "github.com/mattermost/mattermost-plugin-ai/server/ai" @@ -28,46 +30,100 @@ func New(llmService ai.ServiceConfig, httpClient *http.Client, metricsService me } } +// isValidImageType checks if the MIME type is supported by the Anthropic API +func isValidImageType(mimeType string) bool { + validTypes := map[string]bool{ + "image/jpeg": true, + "image/png": true, + "image/gif": true, + "image/webp": true, + } + return validTypes[mimeType] +} + // conversationToMessages creates a system prompt and a slice of input messages from a bot conversation. func conversationToMessages(conversation ai.BotConversation) (string, []InputMessage) { systemMessage := "" messages := make([]InputMessage, 0, len(conversation.Posts)) - for _, post := range conversation.Posts { - previousRole := "" - previousContent := "" - if len(messages) > 0 { - previous := messages[len(messages)-1] - previousRole = previous.Role - previousContent = previous.Content + + var currentBlocks []ContentBlock + var currentRole string + + flushCurrentMessage := func() { + if len(currentBlocks) > 0 { + var content interface{} + if len(currentBlocks) == 1 && currentBlocks[0].Type == "text" { + content = currentBlocks[0].Text + } else { + content = currentBlocks + } + messages = append(messages, InputMessage{ + Role: currentRole, + Content: content, + }) + currentBlocks = nil } + } + + for _, post := range conversation.Posts { switch post.Role { case ai.PostRoleSystem: systemMessage += post.Message + continue case ai.PostRoleBot: - if previousRole == RoleAssistant { - previousContent += post.Message - continue + if currentRole != RoleAssistant { + flushCurrentMessage() + currentRole = RoleAssistant } - messages = append(messages, - InputMessage{ - Role: RoleAssistant, - Content: post.Message, - }, - ) case ai.PostRoleUser: - if previousRole == RoleUser { - previousContent += post.Message + if currentRole != RoleUser { + flushCurrentMessage() + currentRole = RoleUser + } + default: + continue + } + + // Handle text message + if post.Message != "" { + currentBlocks = append(currentBlocks, ContentBlock{ + Type: "text", + Text: post.Message, + }) + } + + // Handle files/images + for _, file := range post.Files { + if !isValidImageType(file.MimeType) { + currentBlocks = append(currentBlocks, ContentBlock{ + Type: "text", + Text: fmt.Sprintf("[Unsupported image type: %s]", file.MimeType), + }) continue } - messages = append(messages, - InputMessage{ - Role: RoleUser, - Content: post.Message, + + // Read image data + data, err := io.ReadAll(file.Reader) + if err != nil { + currentBlocks = append(currentBlocks, ContentBlock{ + Type: "text", + Text: "[Error reading image data]", + }) + continue + } + + currentBlocks = append(currentBlocks, ContentBlock{ + Type: "image", + Source: &ImageSource{ + Type: "base64", + MediaType: file.MimeType, + Data: base64.StdEncoding.EncodeToString(data), }, - ) + }) } } + flushCurrentMessage() return systemMessage, messages } diff --git a/server/ai/anthropic/anthropic_test.go b/server/ai/anthropic/anthropic_test.go new file mode 100644 index 00000000..42bcf28b --- /dev/null +++ b/server/ai/anthropic/anthropic_test.go @@ -0,0 +1,222 @@ +package anthropic + +import ( + "bytes" + "testing" + + "github.com/mattermost/mattermost-plugin-ai/server/ai" + "github.com/stretchr/testify/assert" +) + +func TestConversationToMessages(t *testing.T) { + tests := []struct { + name string + conversation ai.BotConversation + wantSystem string + wantMessages []InputMessage + }{ + { + name: "basic conversation with system message", + conversation: ai.BotConversation{ + Posts: []ai.Post{ + {Role: ai.PostRoleSystem, Message: "You are a helpful assistant"}, + {Role: ai.PostRoleUser, Message: "Hello"}, + {Role: ai.PostRoleBot, Message: "Hi there!"}, + }, + }, + wantSystem: "You are a helpful assistant", + wantMessages: []InputMessage{ + {Role: RoleUser, Content: "Hello"}, + {Role: RoleAssistant, Content: "Hi there!"}, + }, + }, + { + name: "multiple messages from same role", + conversation: ai.BotConversation{ + Posts: []ai.Post{ + {Role: ai.PostRoleUser, Message: "First message"}, + {Role: ai.PostRoleUser, Message: "Second message"}, + {Role: ai.PostRoleBot, Message: "First response"}, + {Role: ai.PostRoleBot, Message: "Second response"}, + }, + }, + wantSystem: "", + wantMessages: []InputMessage{ + {Role: RoleUser, Content: []ContentBlock{ + {Type: "text", Text: "First message"}, + {Type: "text", Text: "Second message"}, + }}, + {Role: RoleAssistant, Content: []ContentBlock{ + {Type: "text", Text: "First response"}, + {Type: "text", Text: "Second response"}, + }}, + }, + }, + { + name: "conversation with image", + conversation: ai.BotConversation{ + Posts: []ai.Post{ + {Role: ai.PostRoleUser, Message: "Look at this:", + Files: []ai.File{ + { + MimeType: "image/jpeg", + Reader: bytes.NewReader([]byte("fake-image-data")), + }, + }}, + {Role: ai.PostRoleBot, Message: "I see the image"}, + }, + }, + wantSystem: "", + wantMessages: []InputMessage{ + {Role: RoleUser, Content: []ContentBlock{ + {Type: "text", Text: "Look at this:"}, + { + Type: "image", + Source: &ImageSource{ + Type: "base64", + MediaType: "image/jpeg", + Data: "ZmFrZS1pbWFnZS1kYXRh", // base64 encoded "fake-image-data" + }, + }, + }}, + {Role: RoleAssistant, Content: "I see the image"}, + }, + }, + { + name: "unsupported image type", + conversation: ai.BotConversation{ + Posts: []ai.Post{ + {Role: ai.PostRoleUser, Files: []ai.File{ + { + MimeType: "image/tiff", + Reader: bytes.NewReader([]byte("fake-tiff-data")), + }, + }}, + }, + }, + wantSystem: "", + wantMessages: []InputMessage{ + {Role: RoleUser, Content: "[Unsupported image type: image/tiff]"}, + }, + }, + { + name: "complex back and forth with repeated roles", + conversation: ai.BotConversation{ + Posts: []ai.Post{ + {Role: ai.PostRoleUser, Message: "First question"}, + {Role: ai.PostRoleBot, Message: "First answer"}, + {Role: ai.PostRoleUser, Message: "Follow up 1"}, + {Role: ai.PostRoleUser, Message: "Follow up 2"}, + {Role: ai.PostRoleUser, Message: "Follow up 3"}, + {Role: ai.PostRoleBot, Message: "Response 1"}, + {Role: ai.PostRoleBot, Message: "Response 2"}, + {Role: ai.PostRoleBot, Message: "Response 3"}, + {Role: ai.PostRoleUser, Message: "Final question"}, + }, + }, + wantSystem: "", + wantMessages: []InputMessage{ + {Role: RoleUser, Content: "First question"}, + {Role: RoleAssistant, Content: "First answer"}, + {Role: RoleUser, Content: []ContentBlock{ + {Type: "text", Text: "Follow up 1"}, + {Type: "text", Text: "Follow up 2"}, + {Type: "text", Text: "Follow up 3"}, + }}, + {Role: RoleAssistant, Content: []ContentBlock{ + {Type: "text", Text: "Response 1"}, + {Type: "text", Text: "Response 2"}, + {Type: "text", Text: "Response 3"}, + }}, + {Role: RoleUser, Content: "Final question"}, + }, + }, + { + name: "multiple roles with multiple images", + conversation: ai.BotConversation{ + Posts: []ai.Post{ + {Role: ai.PostRoleUser, Message: "Look at these images:", + Files: []ai.File{ + { + MimeType: "image/jpeg", + Reader: bytes.NewReader([]byte("image-1")), + }, + { + MimeType: "image/png", + Reader: bytes.NewReader([]byte("image-2")), + }, + }, + }, + {Role: ai.PostRoleBot, Message: "I see them"}, + {Role: ai.PostRoleUser, Message: "Here are more:", + Files: []ai.File{ + { + MimeType: "image/webp", + Reader: bytes.NewReader([]byte("image-3")), + }, + { + MimeType: "image/tiff", // unsupported + Reader: bytes.NewReader([]byte("image-4")), + }, + { + MimeType: "image/gif", + Reader: bytes.NewReader([]byte("image-5")), + }, + }, + }, + }, + }, + wantSystem: "", + wantMessages: []InputMessage{ + {Role: RoleUser, Content: []ContentBlock{ + {Type: "text", Text: "Look at these images:"}, + { + Type: "image", + Source: &ImageSource{ + Type: "base64", + MediaType: "image/jpeg", + Data: "aW1hZ2UtMQ==", // base64 encoded "image-1" + }, + }, + { + Type: "image", + Source: &ImageSource{ + Type: "base64", + MediaType: "image/png", + Data: "aW1hZ2UtMg==", // base64 encoded "image-2" + }, + }, + }}, + {Role: RoleAssistant, Content: "I see them"}, + {Role: RoleUser, Content: []ContentBlock{ + {Type: "text", Text: "Here are more:"}, + { + Type: "image", + Source: &ImageSource{ + Type: "base64", + MediaType: "image/webp", + Data: "aW1hZ2UtMw==", // base64 encoded "image-3" + }, + }, + {Type: "text", Text: "[Unsupported image type: image/tiff]"}, + { + Type: "image", + Source: &ImageSource{ + Type: "base64", + MediaType: "image/gif", + Data: "aW1hZ2UtNQ==", // base64 encoded "image-5" + }, + }, + }}, + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + gotSystem, gotMessages := conversationToMessages(tt.conversation) + assert.Equal(t, tt.wantSystem, gotSystem) + assert.Equal(t, tt.wantMessages, gotMessages) + }) + } +} diff --git a/server/ai/anthropic/client.go b/server/ai/anthropic/client.go index b1a7fd1a..83544dad 100644 --- a/server/ai/anthropic/client.go +++ b/server/ai/anthropic/client.go @@ -23,9 +23,21 @@ const ( const RoleUser = "user" const RoleAssistant = "assistant" +type ContentBlock struct { + Type string `json:"type"` + Text string `json:"text,omitempty"` + Source *ImageSource `json:"source,omitempty"` +} + +type ImageSource struct { + Type string `json:"type"` + MediaType string `json:"media_type"` + Data string `json:"data"` +} + type InputMessage struct { - Role string `json:"role"` - Content string `json:"content"` + Role string `json:"role"` + Content interface{} `json:"content"` } type RequestMetadata struct { diff --git a/webapp/src/components/system_console/bot.tsx b/webapp/src/components/system_console/bot.tsx index d4f16849..a6be138d 100644 --- a/webapp/src/components/system_console/bot.tsx +++ b/webapp/src/components/system_console/bot.tsx @@ -158,7 +158,7 @@ const Bot = (props: Props) => { value={props.bot.customInstructions} onChange={(e) => props.onChange({...props.bot, customInstructions: e.target.value})} /> - {(props.bot.service.type === 'openai' || props.bot.service.type === 'openaicompatible' || props.bot.service.type === 'azure') && ( + {(props.bot.service.type === 'openai' || props.bot.service.type === 'openaicompatible' || props.bot.service.type === 'azure' || props.bot.service.type === 'anthropic') && ( <>