Skip to content

Commit

Permalink
Includes attachment extraction errors in chat completion messages (mi…
Browse files Browse the repository at this point in the history
…crosoft#125)

To give chat-completion context about why a file does not have content,
so it doesn't make something up when asked questions about it.

Additionally:
- places a lock around attachment processing so two different events
(file.created and message.created, for example) don't result in the
attachment being processed twice in parallel
- deletes unused, premature assistant capabilities
- clears file input on message send, so you can send subsequent messages
with the same attachment, which is particularly helpful when testing
  • Loading branch information
markwaddle authored Oct 16, 2024
1 parent c2e51aa commit 56b4d62
Show file tree
Hide file tree
Showing 10 changed files with 97 additions and 84 deletions.
5 changes: 0 additions & 5 deletions assistants/prospector-assistant/assistant/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
)
from semantic_workbench_assistant.assistant_app import (
AssistantApp,
AssistantCapability,
BaseModelAssistantConfig,
ContentSafety,
ContentSafetyEvaluator,
Expand Down Expand Up @@ -72,10 +71,6 @@ async def content_evaluator_factory(context: ConversationContext) -> ContentSafe
assistant_service_id=service_id,
assistant_service_name=service_name,
assistant_service_description=service_description,
capabilities={
AssistantCapability.supports_conversation_messages_chat,
AssistantCapability.supports_conversation_messages_command,
},
config_provider=assistant_config.provider,
content_interceptor=content_safety,
inspector_state_providers={
Expand Down
5 changes: 0 additions & 5 deletions assistants/skill-assistant/assistant/skill_assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
)
from semantic_workbench_assistant.assistant_app import (
AssistantApp,
AssistantCapability,
BaseModelAssistantConfig,
ContentSafety,
ContentSafetyEvaluator,
Expand Down Expand Up @@ -62,10 +61,6 @@ async def content_evaluator_factory(context: ConversationContext) -> ContentSafe
assistant_service_id=service_id,
assistant_service_name=service_name,
assistant_service_description=service_description,
capabilities={
AssistantCapability.supports_conversation_messages_chat,
AssistantCapability.supports_conversation_messages_command,
},
config_provider=assistant_config.provider,
content_interceptor=content_safety,
)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import asyncio
import contextlib
import datetime
import io
import logging
from typing import Any, Awaitable, Callable, Sequence

from assistant_drive import Drive, DriveConfig
from assistant_drive import Drive, DriveConfig, IfDriveFileExistsBehavior
from openai.types import chat
from semantic_workbench_api_model.workbench_model import (
ConversationEvent,
Expand All @@ -25,20 +25,20 @@
logger = logging.getLogger(__name__)


AttachmentProcessingErrorHandler = Callable[[ConversationContext, Exception], Awaitable]
AttachmentProcessingErrorHandler = Callable[[ConversationContext, str, Exception], Awaitable]


async def log_and_send_message_on_error(context: ConversationContext, e: Exception) -> None:
async def log_and_send_message_on_error(context: ConversationContext, filename: str, e: Exception) -> None:
"""
Default error handler for attachment processing, which logs the exception and sends
a message to the conversation.
"""

logger.exception("exception occurred processing attachment")
logger.exception("exception occurred processing attachment", exc_info=e)
await context.send_messages(
NewConversationMessage(
content=f"There was an error processing the attachment: {e}",
message_type=MessageType.chat,
content=f"There was an error processing the attachment ({filename}): {e}",
message_type=MessageType.notice,
metadata={"attribution": "system"},
)
)
Expand All @@ -47,6 +47,7 @@ async def log_and_send_message_on_error(context: ConversationContext, e: Excepti
attachment_tag = "ATTACHMENT"
filename_tag = "FILENAME"
content_tag = "CONTENT"
error_tag = "ERROR"
image_tag = "IMAGE"


Expand Down Expand Up @@ -115,7 +116,7 @@ async def on_file_deleted(context: ConversationContext, event: ConversationEvent
"""

# delete the attachment for the file
_delete_attachment_for_file(context, file)
await _delete_attachment_for_file(context, file)

async def get_completion_messages_for_attachments(
self,
Expand Down Expand Up @@ -162,7 +163,8 @@ async def get_completion_messages_for_attachments(

# process each attachment
for attachment in attachments:
content = f"<{attachment_tag}><{filename_tag}>{attachment.filename}</{filename_tag}><{content_tag}>{attachment.content}</{content_tag}></{attachment_tag}>"
error_element = f"<{error_tag}>{attachment.error}</{error_tag}>" if attachment.error else ""
content = f"<{attachment_tag}><{filename_tag}>{attachment.filename}</{filename_tag}>{error_element}<{content_tag}>{attachment.content}</{content_tag}></{attachment_tag}>"

# if the content is a data URI, include it as an image type within the message content
if attachment.content.startswith("data:image/"):
Expand Down Expand Up @@ -217,12 +219,12 @@ async def _get_attachments(

# delete cached attachments that are no longer in the conversation
filenames = {file.filename for file in files_response.files}
_delete_attachments_not_in(context, filenames)
await _delete_attachments_not_in(context, filenames)

return attachments


def _delete_attachments_not_in(context: ConversationContext, filenames: set[str]) -> None:
async def _delete_attachments_not_in(context: ConversationContext, filenames: set[str]) -> None:
"""Deletes cached attachments that are not in the filenames argument."""
drive = _attachment_drive_for_context(context)
for filename in drive.list():
Expand All @@ -232,6 +234,33 @@ def _delete_attachments_not_in(context: ConversationContext, filenames: set[str]
with contextlib.suppress(FileNotFoundError):
drive.delete(filename)

await _delete_lock_for_context_file(context, filename)


_file_locks_lock = asyncio.Lock()
_file_locks: dict[str, asyncio.Lock] = {}


async def _delete_lock_for_context_file(context: ConversationContext, filename: str) -> None:
async with _file_locks_lock:
key = f"{context.assistant.id}/{context.id}/{filename}"
if key not in _file_locks:
return

del _file_locks[key]


async def _lock_for_context_file(context: ConversationContext, filename: str) -> asyncio.Lock:
"""
Get a lock for the given file in the given context.
"""
async with _file_locks_lock:
key = f"{context.assistant.id}/{context.id}/{filename}"
if key not in _file_locks:
_file_locks[key] = asyncio.Lock()

return _file_locks[key]


async def _get_attachment_for_file(
context: ConversationContext, file: File, metadata: dict[str, Any], error_handler: AttachmentProcessingErrorHandler
Expand All @@ -243,38 +272,49 @@ async def _get_attachment_for_file(
"""
drive = _attachment_drive_for_context(context)

with contextlib.suppress(FileNotFoundError):
attachment = drive.read_model(Attachment, file.filename)
# ensure that only one async task is updating the attachment for the file
file_lock = await _lock_for_context_file(context, file.filename)
async with file_lock:
with contextlib.suppress(FileNotFoundError):
attachment = drive.read_model(Attachment, file.filename)

if attachment.updated_datetime.astimezone(datetime.UTC) >= file.updated_datetime.astimezone(datetime.UTC):
# if the attachment is up-to-date, return it
return attachment
if attachment.updated_datetime.timestamp() >= file.updated_datetime.timestamp():
# if the attachment is up-to-date, return it
return attachment

# process the file to create an attachment
async with context.set_status_for_block(f"updating attachment {file.filename} ..."):
content = ""
try:
# read the content of the file
file_bytes = await _read_conversation_file(context, file)
# convert the content of the file to a string
content = await convert.bytes_to_str(file_bytes, filename=file.filename)
except Exception as e:
await error_handler(context, e)

attachment = Attachment(
filename=file.filename, content=content, metadata=metadata, updated_datetime=file.updated_datetime
)
drive.write_model(attachment, file.filename)
error = ""
# process the file to create an attachment
async with context.set_status_for_block(f"updating attachment {file.filename} ..."):
try:
# read the content of the file
file_bytes = await _read_conversation_file(context, file)
# convert the content of the file to a string
content = await convert.bytes_to_str(file_bytes, filename=file.filename)
except Exception as e:
await error_handler(context, file.filename, e)
error = f"error processing file: {e}"

attachment = Attachment(
filename=file.filename,
content=content,
metadata=metadata,
updated_datetime=file.updated_datetime,
error=error,
)
drive.write_model(attachment, file.filename, if_exists=IfDriveFileExistsBehavior.OVERWRITE)

return attachment
return attachment


def _delete_attachment_for_file(context: ConversationContext, file: File) -> None:
async def _delete_attachment_for_file(context: ConversationContext, file: File) -> None:
drive = _attachment_drive_for_context(context)

with contextlib.suppress(FileNotFoundError):
drive.delete(file.filename)

await _delete_lock_for_context_file(context, file.filename)


def _attachment_drive_for_context(context: ConversationContext) -> Drive:
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,29 +16,22 @@ async def bytes_to_str(file_bytes: bytes, filename: str) -> str:
"""
filename_extension = pathlib.Path(filename).suffix.lower().strip(".")

try:
match filename_extension:
# if the file has .docx extension, convert it to text
case "docx":
content = await _docx_bytes_to_str(file_bytes)

# if the file has .pdf extension, convert it to text
case "pdf":
content = await _pdf_bytes_to_str(file_bytes)

# if the file has an image extension, convert it to a data URI
case _ if filename_extension in ["png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif"]:
content = _image_bytes_to_str(file_bytes, filename_extension)

# otherwise, try to convert the file to text
case _:
content = file_bytes.decode("utf-8")

except Exception:
logger.exception("error converting %s to text", filename)
content = ""

return content
match filename_extension:
# if the file has .docx extension, convert it to text
case "docx":
return await _docx_bytes_to_str(file_bytes)

# if the file has .pdf extension, convert it to text
case "pdf":
return await _pdf_bytes_to_str(file_bytes)

# if the file has an image extension, convert it to a data URI
case _ if filename_extension in ["png", "jpg", "jpeg", "gif", "bmp", "tiff", "tif"]:
return _image_bytes_to_str(file_bytes, filename_extension)

# otherwise, try to convert the file to text
case _:
return file_bytes.decode("utf-8")


async def _docx_bytes_to_str(file_bytes: bytes) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ class AttachmentsConfigModel(BaseModel):

class Attachment(BaseModel):
filename: str
content: str
metadata: dict[str, Any]
content: str = ""
error: str = ""
metadata: dict[str, Any] = {}
updated_datetime: datetime.datetime = Field(default=datetime.datetime.fromtimestamp(0, datetime.timezone.utc))
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def __init__(
assistant_service_name: str,
assistant_service_description: str,
assistant_service_metadata: dict[str, Any] = {},
capabilities: set[AssistantCapability] = {AssistantCapability.supports_conversation_messages_chat},
capabilities: set[AssistantCapability] = set(),
config_provider: AssistantConfigProvider = BaseModelAssistantConfig(EmptyConfigModel).provider,
data_exporter: AssistantDataExporter = FileStorageAssistantDataExporter(),
conversation_data_exporter: ConversationDataExporter = FileStorageConversationDataExporter(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -270,15 +270,3 @@ class AssistantCapability(StrEnum):

supports_conversation_files = "supports_conversation_files"
"""Advertise support for awareness of files in the conversation."""

supports_conversation_messages_directed_at = "supports_conversation_messages_directed_at"
"""
Advertise support for the directed_at attribute in message metadata, and only respond to
messages that are directed to the assistant.
"""

supports_conversation_messages_chat = "supports_conversation_messages_chat"
"""Advertise support for responding to messages of type 'chat'."""

supports_conversation_messages_command = "supports_conversation_messages_command"
"""Advertise support for responding to messages of type 'command'."""
4 changes: 4 additions & 0 deletions workbench-app/src/components/Conversations/InteractInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,10 @@ export const InteractInput: React.FC<InteractInputProps> = (props) => {
const files = attachmentFiles.size > 0 ? [...attachmentFiles.values()] : undefined;
// reset the attachment files so that the same files are not uploaded again
setAttachmentFiles(new Map());
// reset the files form input
if (attachmentInputRef.current) {
attachmentInputRef.current.value = '';
}
if (files) {
await uploadConversationFiles({ conversationId, files });
}
Expand Down
2 changes: 1 addition & 1 deletion workbench-app/src/libs/useAssistantCapabilities.ts
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ export function useGetAssistantCapabilitiesSet(assistants: Assistant[]) {
const metadataCapabilities = info.metadata?.capabilities;

// If there are no capabilities specified at all, default to all capabilities
if (metadataCapabilities === undefined || Object.keys(metadataCapabilities).length === 0) {
if (metadataCapabilities === undefined) {
acc.union(allCapabilities);
return acc;
}
Expand Down
3 changes: 0 additions & 3 deletions workbench-app/src/models/AssistantCapability.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
export enum AssistantCapability {
SupportsConversationFiles = 'supports_conversation_files',
SupportsConversationMessagesDirectedAt = 'supports_conversation_messages_directed_at',
SupportsConversationMessagesChat = 'supports_conversation_messages_chat',
SupportsConversationMessagesCommand = 'supports_conversation_messages_command',
}

0 comments on commit 56b4d62

Please sign in to comment.