Improves form-filler instructions (microsoft#280)

Improves form-filler instructions: - improves ability to identity and resolve conflicts - improves ability to handle multiple files Extends form-field model: - to recognize "sections" in forms, for more context in extracted data, and improved populated-form rendering - renders description and instructions in the populated-form Additionally: - further moves away from "agent" name to "extension", by moving out of the agents directory - adds support for PNG files (screenshots, etc.) - adds timestamps to user messages for current-date awareness - fixes links in the app for already-copied conversations
momuno · Dec 10, 2024 · 7c8801c · 7c8801c
1 parent 47cdd69
commit 7c8801c
Show file tree

Hide file tree

Showing 18 changed files with 280 additions and 189 deletions.
diff --git a/...ospector-assistant/assistant/agents/form_fill_extension/steps/extract_form_fields_step.py b/...ospector-assistant/assistant/agents/form_fill_extension/steps/extract_form_fields_step.py
diff --git a/assistants/prospector-assistant/assistant/chat.py b/assistants/prospector-assistant/assistant/chat.py
@@ -14,6 +14,7 @@
 
 import deepmerge
 import openai_client
+from assistant_extensions.ai_clients.model import CompletionMessageImageContent
 from assistant_extensions.attachments import AttachmentsExtension
 from content_safety.evaluators import CombinedContentSafetyEvaluator
 from openai.types.chat import ChatCompletionMessageParam
@@ -37,8 +38,8 @@
 from . import legacy
 from .agents.artifact_agent import Artifact, ArtifactAgent, ArtifactConversationInspectorStateProvider
 from .agents.document_agent import DocumentAgent
-from .agents.form_fill_extension import FormFillExtension, LLMConfig
 from .config import AssistantConfigModel
+from .form_fill_extension import FormFillExtension, LLMConfig
 
 logger = logging.getLogger(__name__)
 
@@ -131,8 +132,8 @@ async def on_chat_message_created(
       - @assistant.events.conversation.message.on_created
     """
 
-    # update the participant status to indicate the assistant is thinking
-    async with send_error_message_on_exception(context), context.set_status("thinking..."):
+    # update the participant status to indicate the assistant is responding
+    async with send_error_message_on_exception(context), context.set_status("responding..."):
         #
         # NOTE: we're experimenting with agents, if they are enabled, use them to respond to the conversation
         #
@@ -183,7 +184,7 @@ async def on_conversation_created(context: ConversationContext) -> None:
 
 
 async def welcome_message_form_fill(context: ConversationContext) -> None:
-    async with send_error_message_on_exception(context), context.set_status("thinking..."):
+    async with send_error_message_on_exception(context), context.set_status("responding..."):
         await form_fill_execute(context, None)
 
 
@@ -193,7 +194,7 @@ async def welcome_message_create_document(
     message: ConversationMessage | None,
     metadata: dict[str, Any],
 ) -> None:
-    async with send_error_message_on_exception(context), context.set_status("thinking..."):
+    async with send_error_message_on_exception(context), context.set_status("responding..."):
         await create_document_execute(config, context, message, metadata)
 
 
@@ -223,6 +224,7 @@ async def form_fill_execute(context: ConversationContext, message: ConversationM
     Execute the form fill agent to respond to the conversation message.
     """
     config = await assistant_config.get(context.assistant)
+    participants = await context.get_participants(include_inactive=True)
     await form_fill_extension.execute(
         llm_config=LLMConfig(
             openai_client_factory=lambda: openai_client.create_client(config.service_config),
@@ -231,7 +233,7 @@ async def form_fill_execute(context: ConversationContext, message: ConversationM
         ),
         config=config.agents_config.form_fill_agent,
         context=context,
-        latest_user_message=message.content if message else None,
+        latest_user_message=_format_message(message, participants.participants) if message else None,
         latest_attachment_filenames=message.filenames if message else [],
         get_attachment_content=form_fill_extension_get_attachment(context, config),
     )
@@ -251,8 +253,26 @@ async def get(filename: str) -> str:
         if not messages:
             return ""
 
-        # filter down to the messages that contain the attachment (ie. don't include the system messages)
-        return "\n\n".join((str(message.content) for message in messages if "<ATTACHMENT>" in str(message.content)))
+        # filter down to the message with the attachment
+        user_message = next(
+            (message for message in messages if "<ATTACHMENT>" in str(message)),
+            None,
+        )
+        if not user_message:
+            return ""
+
+        content = user_message.content
+        match content:
+            case str():
+                return content
+
+            case list():
+                for part in content:
+                    match part:
+                        case CompletionMessageImageContent():
+                            return part.data
+
+        return ""
 
     return get
 

diff --git a/assistants/prospector-assistant/assistant/config.py b/assistants/prospector-assistant/assistant/config.py
@@ -8,7 +8,7 @@
 
 from . import helpers
 from .agents.artifact_agent import ArtifactAgentConfigModel
-from .agents.form_fill_extension import FormFillConfig
+from .form_fill_extension import FormFillConfig
 
 # The semantic workbench app uses react-jsonschema-form for rendering
 # dynamic configuration forms based on the configuration model and UI schema

diff --git a/...nt/agents/form_fill_extension/__init__.py → ...assistant/form_fill_extension/__init__.py b/...nt/agents/form_fill_extension/__init__.py → ...assistant/form_fill_extension/__init__.py
diff --git a/...tant/agents/form_fill_extension/config.py → ...t/assistant/form_fill_extension/config.py b/...tant/agents/form_fill_extension/config.py → ...t/assistant/form_fill_extension/config.py
diff --git a/...t/agents/form_fill_extension/extension.py → ...ssistant/form_fill_extension/extension.py b/...t/agents/form_fill_extension/extension.py → ...ssistant/form_fill_extension/extension.py
@@ -78,31 +78,35 @@ def build_step_context(config: ConfigT) -> Context[ConfigT]:
 
                     case state.FormFillExtensionMode.extract_form_fields:
                         file_content = await get_attachment_content(agent_state.form_filename)
+                        attachment = UserAttachment(filename=agent_state.form_filename, content=file_content)
                         result = await extract_form_fields_step.execute(
                             step_context=build_step_context(config.extract_form_fields_config),
-                            file_content=file_content,
+                            potential_form_attachment=attachment,
                         )
 
                         match result:
                             case extract_form_fields_step.CompleteResult():
-                                await _send_message(context, result.message, result.debug)
+                                await _send_message(context, result.message, result.debug, MessageType.notice)
 
-                                agent_state.extracted_form_title = result.extracted_form_title
-                                agent_state.extracted_form_fields = result.extracted_form_fields
+                                agent_state.extracted_form = result.extracted_form
                                 agent_state.mode = state.FormFillExtensionMode.fill_form_step
 
                                 continue
 
                             case _:
                                 await _handle_incomplete_result(context, result)
+
+                                agent_state.mode = state.FormFillExtensionMode.acquire_form_step
                                 return
 
                     case state.FormFillExtensionMode.fill_form_step:
+                        if agent_state.extracted_form is None:
+                            raise ValueError("extracted_form is None")
+
                         result = await fill_form_step.execute(
                             step_context=build_step_context(config.fill_form_config),
                             form_filename=agent_state.form_filename,
-                            form_title=agent_state.extracted_form_title,
-                            form_fields=agent_state.extracted_form_fields,
+                            form=agent_state.extracted_form,
                         )
 
                         match result:
@@ -143,14 +147,16 @@ async def _handle_incomplete_result(context: ConversationContext, result: Incomp
             raise ValueError(f"Unexpected incomplete result type: {result}")
 
 
-async def _send_message(context: ConversationContext, message: str, debug: dict) -> None:
+async def _send_message(
+    context: ConversationContext, message: str, debug: dict, message_type: MessageType = MessageType.chat
+) -> None:
     if not message:
         return
 
     await context.send_messages(
         NewConversationMessage(
             content=message,
-            message_type=MessageType.chat,
+            message_type=message_type,
             debug_data=debug,
         )
     )

diff --git a/...t/agents/form_fill_extension/inspector.py → ...ssistant/form_fill_extension/inspector.py b/...t/agents/form_fill_extension/inspector.py → ...ssistant/form_fill_extension/inspector.py
diff --git a/...stant/agents/form_fill_extension/state.py → ...nt/assistant/form_fill_extension/state.py b/...stant/agents/form_fill_extension/state.py → ...nt/assistant/form_fill_extension/state.py
@@ -39,6 +39,21 @@ class FormField(BaseModel):
     )
 
 
+class Section(BaseModel):
+    title: str = Field(description="The title of the section if one is provided on the form.")
+    description: str = Field(description="The description of the section if one is provided on the form.")
+    instructions: str = Field(description="The instructions for the section if they are provided on the form.")
+    fields: list[FormField] = Field(description="The fields of the section.")
+
+
+class Form(BaseModel):
+    title: str = Field(description="The title of the form.")
+    description: str = Field(description="The description of the form if one is provided on the form.")
+    instructions: str = Field(description="The instructions for the form if they are provided on the form.")
+    fields: list[FormField] = Field(description="The fields of the form, if there are any at the top level.")
+    sections: list[Section] = Field(description="The sections of the form, if there are any.")
+
+
 class FormFillExtensionMode(StrEnum):
     acquire_form_step = "acquire_form"
     extract_form_fields = "extract_form_fields"
@@ -49,8 +64,7 @@ class FormFillExtensionMode(StrEnum):
 class FormFillExtensionState(BaseModel):
     mode: FormFillExtensionMode = FormFillExtensionMode.acquire_form_step
     form_filename: str = ""
-    extracted_form_title: str = ""
-    extracted_form_fields: list[FormField] = []
+    extracted_form: Form | None = None
     populated_form_markdown: str = ""
     fill_form_gc_artifact: dict | None = None
 
@@ -81,4 +95,4 @@ async def extension_state(context: ConversationContext) -> AsyncIterator[FormFil
         current_state.set(None)
 
 
-inspector = FileStateInspector(display_name="FormFill Agent", file_path_source=path_for_state)
+inspector = FileStateInspector(display_name="Debug: FormFill Agent", file_path_source=path_for_state)
diff --git a/...nts/form_fill_extension/steps/__init__.py → ...ant/form_fill_extension/steps/__init__.py b/...nts/form_fill_extension/steps/__init__.py → ...ant/form_fill_extension/steps/__init__.py
diff --git a/...l_extension/steps/_guided_conversation.py → ...l_extension/steps/_guided_conversation.py b/...l_extension/steps/_guided_conversation.py → ...l_extension/steps/_guided_conversation.py
diff --git a/.../agents/form_fill_extension/steps/_llm.py → ...sistant/form_fill_extension/steps/_llm.py b/.../agents/form_fill_extension/steps/_llm.py → ...sistant/form_fill_extension/steps/_llm.py
diff --git a/...fill_extension/steps/acquire_form_step.py → ...fill_extension/steps/acquire_form_step.py b/...fill_extension/steps/acquire_form_step.py → ...fill_extension/steps/acquire_form_step.py
@@ -28,7 +28,6 @@ def extend(app: AssistantAppProtocol) -> None:
 
 
 class FormArtifact(BaseModel):
-    title: str = Field(description="The title of the form.", default="")
     filename: str = Field(description="The filename of the form.", default="")
 
 
@@ -40,11 +39,9 @@ class FormArtifact(BaseModel):
     ],
     conversation_flow=dedent("""
         1. Inform the user that our goal is to help the user fill out a form.
-        2. Ask the user to provide a file that contains a form. The file can be PDF, TXT, or DOCX.
-        3. When you receive a file, determine if the file looks to be a form.
-        4. If the file is not a form, inform the user that the file is not a form. Ask them to provide a different file.
-        5. If the form is a file, update the artifcat with the title and filename of the form.
-        6. Inform the user that you will now extract the form fields, so that you can assist them in filling it out.
+        2. Ask the user to provide a file that contains a form. The file can be PDF, TXT, DOCX, or PNG.
+        3. When you receive a file, set the filename field in the artifact.
+        4. Inform the user that you will now extract the form fields, so that you can assist them in filling it out.
     """).strip(),
     context="",
     resource_constraint=ResourceConstraintDefinition(
@@ -116,15 +113,15 @@ def _get_state_file_path(context: ConversationContext) -> Path:
 
 
 _inspector = FileStateInspector(
-    display_name="Acquire-Form Guided-Conversation",
+    display_name="Debug: Acquire-Form Guided-Conversation",
     file_path_source=_get_state_file_path,
 )
 
 
 async def input_to_message(input: UserInput) -> str | None:
     attachments = []
     async for attachment in input.attachments:
-        attachments.append(attachment.content)
+        attachments.append(f"<ATTACHMENT>{attachment.filename}</ATTACHMENT>")
 
     if not attachments:
         return input.message