Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add image support for Anthropic #271

Closed
wants to merge 10 commits into from
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ server/manifest.go

# VSCode
.vscode/
.aider*
.env
102 changes: 79 additions & 23 deletions server/ai/anthropic/anthropic.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
package anthropic

import (
"encoding/base64"
"fmt"
"io"
"net/http"

"github.com/mattermost/mattermost-plugin-ai/server/ai"
Expand All @@ -28,46 +30,100 @@ func New(llmService ai.ServiceConfig, httpClient *http.Client, metricsService me
}
}

// isValidImageType checks if the MIME type is supported by the Anthropic API
func isValidImageType(mimeType string) bool {
validTypes := map[string]bool{
"image/jpeg": true,
"image/png": true,
"image/gif": true,
"image/webp": true,
}
return validTypes[mimeType]
}

// conversationToMessages creates a system prompt and a slice of input messages from a bot conversation.
func conversationToMessages(conversation ai.BotConversation) (string, []InputMessage) {
systemMessage := ""
messages := make([]InputMessage, 0, len(conversation.Posts))
for _, post := range conversation.Posts {
previousRole := ""
previousContent := ""
if len(messages) > 0 {
previous := messages[len(messages)-1]
previousRole = previous.Role
previousContent = previous.Content

var currentBlocks []ContentBlock
var currentRole string

flushCurrentMessage := func() {
if len(currentBlocks) > 0 {
var content interface{}
if len(currentBlocks) == 1 && currentBlocks[0].Type == "text" {
content = currentBlocks[0].Text
} else {
content = currentBlocks
}
messages = append(messages, InputMessage{
Role: currentRole,
Content: content,
})
currentBlocks = nil
}
}

for _, post := range conversation.Posts {
switch post.Role {
case ai.PostRoleSystem:
systemMessage += post.Message
continue
case ai.PostRoleBot:
if previousRole == RoleAssistant {
previousContent += post.Message
continue
if currentRole != RoleAssistant {
flushCurrentMessage()
currentRole = RoleAssistant
}
messages = append(messages,
InputMessage{
Role: RoleAssistant,
Content: post.Message,
},
)
case ai.PostRoleUser:
if previousRole == RoleUser {
previousContent += post.Message
if currentRole != RoleUser {
flushCurrentMessage()
currentRole = RoleUser
}
default:
continue
}

// Handle text message
if post.Message != "" {
currentBlocks = append(currentBlocks, ContentBlock{
Type: "text",
Text: post.Message,
})
}

// Handle files/images
for _, file := range post.Files {
if !isValidImageType(file.MimeType) {
currentBlocks = append(currentBlocks, ContentBlock{
Type: "text",
Text: fmt.Sprintf("[Unsupported image type: %s]", file.MimeType),
})
continue
}
messages = append(messages,
InputMessage{
Role: RoleUser,
Content: post.Message,

// Read image data
data, err := io.ReadAll(file.Reader)
if err != nil {
currentBlocks = append(currentBlocks, ContentBlock{
Type: "text",
Text: "[Error reading image data]",
})
continue
}

currentBlocks = append(currentBlocks, ContentBlock{
Type: "image",
Source: &ImageSource{
Type: "base64",
MediaType: file.MimeType,
Data: base64.StdEncoding.EncodeToString(data),
},
)
})
}
}

flushCurrentMessage()
return systemMessage, messages
}

Expand Down
222 changes: 222 additions & 0 deletions server/ai/anthropic/anthropic_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
package anthropic

import (
"bytes"
"testing"

"github.com/mattermost/mattermost-plugin-ai/server/ai"
"github.com/stretchr/testify/assert"
)

func TestConversationToMessages(t *testing.T) {
tests := []struct {
name string
conversation ai.BotConversation
wantSystem string
wantMessages []InputMessage
}{
{
name: "basic conversation with system message",
conversation: ai.BotConversation{
Posts: []ai.Post{
{Role: ai.PostRoleSystem, Message: "You are a helpful assistant"},
{Role: ai.PostRoleUser, Message: "Hello"},
{Role: ai.PostRoleBot, Message: "Hi there!"},
},
},
wantSystem: "You are a helpful assistant",
wantMessages: []InputMessage{
{Role: RoleUser, Content: "Hello"},
{Role: RoleAssistant, Content: "Hi there!"},
},
},
{
name: "multiple messages from same role",
conversation: ai.BotConversation{
Posts: []ai.Post{
{Role: ai.PostRoleUser, Message: "First message"},
{Role: ai.PostRoleUser, Message: "Second message"},
{Role: ai.PostRoleBot, Message: "First response"},
{Role: ai.PostRoleBot, Message: "Second response"},
},
},
wantSystem: "",
wantMessages: []InputMessage{
{Role: RoleUser, Content: []ContentBlock{
{Type: "text", Text: "First message"},
{Type: "text", Text: "Second message"},
}},
{Role: RoleAssistant, Content: []ContentBlock{
{Type: "text", Text: "First response"},
{Type: "text", Text: "Second response"},
}},
},
},
{
name: "conversation with image",
conversation: ai.BotConversation{
Posts: []ai.Post{
{Role: ai.PostRoleUser, Message: "Look at this:",
Files: []ai.File{
{
MimeType: "image/jpeg",
Reader: bytes.NewReader([]byte("fake-image-data")),
},
}},
{Role: ai.PostRoleBot, Message: "I see the image"},
},
},
wantSystem: "",
wantMessages: []InputMessage{
{Role: RoleUser, Content: []ContentBlock{
{Type: "text", Text: "Look at this:"},
{
Type: "image",
Source: &ImageSource{
Type: "base64",
MediaType: "image/jpeg",
Data: "ZmFrZS1pbWFnZS1kYXRh", // base64 encoded "fake-image-data"
},
},
}},
{Role: RoleAssistant, Content: "I see the image"},
},
},
{
name: "unsupported image type",
conversation: ai.BotConversation{
Posts: []ai.Post{
{Role: ai.PostRoleUser, Files: []ai.File{
{
MimeType: "image/tiff",
Reader: bytes.NewReader([]byte("fake-tiff-data")),
},
}},
},
},
wantSystem: "",
wantMessages: []InputMessage{
{Role: RoleUser, Content: "[Unsupported image type: image/tiff]"},
},
},
{
name: "complex back and forth with repeated roles",
conversation: ai.BotConversation{
Posts: []ai.Post{
{Role: ai.PostRoleUser, Message: "First question"},
{Role: ai.PostRoleBot, Message: "First answer"},
{Role: ai.PostRoleUser, Message: "Follow up 1"},
{Role: ai.PostRoleUser, Message: "Follow up 2"},
{Role: ai.PostRoleUser, Message: "Follow up 3"},
{Role: ai.PostRoleBot, Message: "Response 1"},
{Role: ai.PostRoleBot, Message: "Response 2"},
{Role: ai.PostRoleBot, Message: "Response 3"},
{Role: ai.PostRoleUser, Message: "Final question"},
},
},
wantSystem: "",
wantMessages: []InputMessage{
{Role: RoleUser, Content: "First question"},
{Role: RoleAssistant, Content: "First answer"},
{Role: RoleUser, Content: []ContentBlock{
{Type: "text", Text: "Follow up 1"},
{Type: "text", Text: "Follow up 2"},
{Type: "text", Text: "Follow up 3"},
}},
{Role: RoleAssistant, Content: []ContentBlock{
{Type: "text", Text: "Response 1"},
{Type: "text", Text: "Response 2"},
{Type: "text", Text: "Response 3"},
}},
{Role: RoleUser, Content: "Final question"},
},
},
{
name: "multiple roles with multiple images",
conversation: ai.BotConversation{
Posts: []ai.Post{
{Role: ai.PostRoleUser, Message: "Look at these images:",
Files: []ai.File{
{
MimeType: "image/jpeg",
Reader: bytes.NewReader([]byte("image-1")),
},
{
MimeType: "image/png",
Reader: bytes.NewReader([]byte("image-2")),
},
},
},
{Role: ai.PostRoleBot, Message: "I see them"},
{Role: ai.PostRoleUser, Message: "Here are more:",
Files: []ai.File{
{
MimeType: "image/webp",
Reader: bytes.NewReader([]byte("image-3")),
},
{
MimeType: "image/tiff", // unsupported
Reader: bytes.NewReader([]byte("image-4")),
},
{
MimeType: "image/gif",
Reader: bytes.NewReader([]byte("image-5")),
},
},
},
},
},
wantSystem: "",
wantMessages: []InputMessage{
{Role: RoleUser, Content: []ContentBlock{
{Type: "text", Text: "Look at these images:"},
{
Type: "image",
Source: &ImageSource{
Type: "base64",
MediaType: "image/jpeg",
Data: "aW1hZ2UtMQ==", // base64 encoded "image-1"
},
},
{
Type: "image",
Source: &ImageSource{
Type: "base64",
MediaType: "image/png",
Data: "aW1hZ2UtMg==", // base64 encoded "image-2"
},
},
}},
{Role: RoleAssistant, Content: "I see them"},
{Role: RoleUser, Content: []ContentBlock{
{Type: "text", Text: "Here are more:"},
{
Type: "image",
Source: &ImageSource{
Type: "base64",
MediaType: "image/webp",
Data: "aW1hZ2UtMw==", // base64 encoded "image-3"
},
},
{Type: "text", Text: "[Unsupported image type: image/tiff]"},
{
Type: "image",
Source: &ImageSource{
Type: "base64",
MediaType: "image/gif",
Data: "aW1hZ2UtNQ==", // base64 encoded "image-5"
},
},
}},
},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotSystem, gotMessages := conversationToMessages(tt.conversation)
assert.Equal(t, tt.wantSystem, gotSystem)
assert.Equal(t, tt.wantMessages, gotMessages)
})
}
}
16 changes: 14 additions & 2 deletions server/ai/anthropic/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,21 @@ const (
const RoleUser = "user"
const RoleAssistant = "assistant"

type ContentBlock struct {
Type string `json:"type"`
Text string `json:"text,omitempty"`
Source *ImageSource `json:"source,omitempty"`
}

type ImageSource struct {
Type string `json:"type"`
MediaType string `json:"media_type"`
Data string `json:"data"`
}

type InputMessage struct {
Role string `json:"role"`
Content string `json:"content"`
Role string `json:"role"`
Content interface{} `json:"content"`
}

type RequestMetadata struct {
Expand Down
2 changes: 1 addition & 1 deletion webapp/src/components/system_console/bot.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ const Bot = (props: Props) => {
value={props.bot.customInstructions}
onChange={(e) => props.onChange({...props.bot, customInstructions: e.target.value})}
/>
{(props.bot.service.type === 'openai' || props.bot.service.type === 'openaicompatible' || props.bot.service.type === 'azure') && (
{(props.bot.service.type === 'openai' || props.bot.service.type === 'openaicompatible' || props.bot.service.type === 'azure' || props.bot.service.type === 'anthropic') && (
<>
<BooleanItem
label={
Expand Down
Loading