From 29aa96aa377d73192d92968debfbc39809dc95e4 Mon Sep 17 00:00:00 2001
From: Mollie Munoz <mollie.munoz@microsoft.com>
Date: Wed, 13 Nov 2024 12:46:02 -0800
Subject: [PATCH] GC outline feedback bug fixes: Hello! every turn, Providing
 draft (#242)

This fixes two bugs:
Hello! every turn:
- Implements a run tracking approach at mode layer to inform gc call if
this is the first time user is calling or if user is returning.

Providing draft:
- improves instructions in gc config.

Also returns doc agents gc helper code to default for draft outline
mode. Decoupling to be fixed in separate task.
---
 .../gc_draft_outline_feedback_config.py       |  73 ++++++---
 .../agents/document/guided_conversation.py    |   9 +-
 .../assistant/agents/document_agent.py        | 142 +++++++++++++++---
 3 files changed, 184 insertions(+), 40 deletions(-)

diff --git a/assistants/prospector-assistant/assistant/agents/document/gc_draft_outline_feedback_config.py b/assistants/prospector-assistant/assistant/agents/document/gc_draft_outline_feedback_config.py
index 7034a47f..70128c60 100644
--- a/assistants/prospector-assistant/assistant/agents/document/gc_draft_outline_feedback_config.py
+++ b/assistants/prospector-assistant/assistant/agents/document/gc_draft_outline_feedback_config.py
@@ -17,40 +17,73 @@
 # It can also be thought of as a working memory for the agent.
 # We allow any valid Pydantic BaseModel class to be used.
 class ArtifactModel(BaseModel):
-    final_response: str = Field(description="The final response from the agent to the user.")
-    conversation_status: str = Field(description="The status of the conversation.")
-    user_decision: str = Field(description="The decision of the user on what should happen next.")
+    final_response: str = Field(
+        description="The final response from the agent to the user. You will update this field."
+    )
+    conversation_status: str = Field(
+        description="The status of the conversation. May be user_initiated, user_returned, or "
+        "user_completed. You are only allowed to update this field to user_completed, otherwise you will not update it."
+    )
+    user_decision: str = Field(
+        description="The decision of the user on what should happen next. May be update_outline or "
+        "draft_paper. You will update this field."
+    )
     filenames: str = Field(
-        description="Names of the available files currently uploaded as attachments. Information from the content of these files was used to help draft the outline under review."
+        description="Names of the available files currently uploaded as attachments. Information "
+        "from the content of these files was used to help draft the outline under review. You "
+        "CANNOT change this field."
+    )
+    current_outline: str = Field(
+        description="The most up-to-date version of the outline under review. You CANNOT change this field."
     )
-    current_outline: str = Field(description="The most up-to-date version of the outline under review.")
 
 
 # Rules - These are the do's and don'ts that the agent should follow during the conversation.
 rules = [
-    "Do NOT rewrite or update the outline, even if the user asks you to."
-    "You are ONLY allowed to help the user decide on any changes to the outline or answer questions about writing an outline."
+    "Do NOT rewrite or update the outline, even if the user asks you to.",
+    "Do NOT show the outline, unless the user asks you to.",
+    (
+        "You are ONLY allowed to help the user decide on any changes to the outline or answer questions "
+        "about writing an outline."
+    ),
+    (
+        "You are only allowed to update conversation_status to user_completed. All other values for that field"
+        " will be preset."
+    ),
+    (
+        "If the conversation_status is marked as user_completed, the final_response cannot be left as "
+        "Unanswered. The final_response must be set based on the conversation flow instructions."
+    ),
     "Terminate the conversation immediately if the user asks for harmful or inappropriate content.",
-    "If the conversation_status is marked as user_complete, the final_response cannot be left as 'Unanswered'. The final_response must be set based on the conversation flow instructions.",
 ]
 
 # Conversation Flow (optional) - This defines in natural language the steps of the conversation.
-conversation_flow = """1. Start by asking the user to review the drafted outline.
-2. Answer any questions about the outline or the drafting process the user might want to explore.
-3. At any time, if the user asks for a change to the outline or updates the attachment file list, the conversation_status must be
-marked as user_completed. The user_decision must be marked as update_outline. The final_response must inform the user that a new outline is being generated based off the new info or request.
-4. At any time, if the user is good with the outline in its current form and ready to move on to drafting a paper from it, the conversation_status must be
-marked as user_completed. The user_decision must be marked as draft_paper. The final_response must inform the user that you will start drafting the beginning of the
-document based on this outline.
+conversation_flow = """1. Start by asking the user to review the outline. The outline will have
+already been provided to the user. You do not provide the outline yourself unless the user
+specifically asks for it from you.
+2. Answer any questions about the outline or the drafting process the user inquires about.
+3. Use the following logic to fill in the artifact fields:
+a. At any time, if the user asks for a change to the outline, the conversation_status must be
+marked as user_completed. The user_decision must be marked as update_outline. The final_response
+must inform the user that a new outline is being generated based off the request.
+b. At any time, if the user has provided new attachments (detected via the filenames in the artifact),
+the conversation_status must be marked as user_completed. The user_decision must be marked as
+update_outline. The final_response must inform the user that a new outline is being generated based
+on the addition of new attachments.
+c. At any time, if the user is good with the outline in its current form and ready to move on to
+drafting a paper from it, the conversation_status must be marked as user_completed. The
+user_decision must be marked as draft_paper. The final_response must inform the user that you will
+start drafting the beginning of the document based on this outline.
 """
 
 # Context (optional) - This is any additional information or the circumstances the agent is in that it should be aware of.
 # It can also include the high level goal of the conversation if needed.
-context = """You are working with a user on drafting an outline. The current drafted outline is provided, along with any filenames
-that were used to help draft the outline. You do not have access to the content within the filenames that were used to help draft the outline.
- Your purpose here is to help the user decide on any changes to the outline they might want or answer questions about it.  This may be the first time
- the user is asking for you help, or the nth time.  Please use the conversation history provided to determine if you should be give an initial greeting
- to the user or continuing the draft outline process."""
+context = """You are working with a user on drafting an outline. The current drafted outline is
+provided, along with any filenames that were used to help draft the outline. You do not have access
+to the content within the filenames that were used to help draft the outline. Your purpose here is
+to help the user decide on any changes to the outline they might want or answer questions about it.
+This may be the first time the user is asking for you help (conversation_status is user_initiated),
+or the nth time (conversation_status is user_returned)."""
 
 
 # Resource Constraints (optional) - This defines the constraints on the conversation such as time or turns.
diff --git a/assistants/prospector-assistant/assistant/agents/document/guided_conversation.py b/assistants/prospector-assistant/assistant/agents/document/guided_conversation.py
index 8430d4ad..a638eac8 100644
--- a/assistants/prospector-assistant/assistant/agents/document/guided_conversation.py
+++ b/assistants/prospector-assistant/assistant/agents/document/guided_conversation.py
@@ -154,7 +154,11 @@ async def step_conversation(
         # should be returning str and Status for Document Agent to consume.  Update doc agent logic accordingly.
         status: Status = Status.UNDEFINED
         if conversation_status is not None:
-            if conversation_status == "Unanswered":
+            if (
+                conversation_status == "Unanswered"
+                or conversation_status == "user_initiated"  # highly coupled to config ...
+                or conversation_status == "user_returned"
+            ):
                 if result.ai_message is not None:
                     response = result.ai_message
                 else:
@@ -172,7 +176,8 @@ async def step_conversation(
                     elif user_decision == "draft_paper":
                         status = Status.USER_COMPLETED
                         next_step_name = (
-                            StepName.DP_DRAFT_CONTENT
+                            StepName.DO_FINISH  # temp for mode_draft_outline.
+                            # StepName.DP_DRAFT_CONTENT
                         )  # problem if in draft outline mode... that is supposed to go to DO_FINISH.
                         # coupling is now a problem.  and Need to fix the two locations for setting the branching/flow.
                     else:
diff --git a/assistants/prospector-assistant/assistant/agents/document_agent.py b/assistants/prospector-assistant/assistant/agents/document_agent.py
index 7a06b68e..967cca78 100644
--- a/assistants/prospector-assistant/assistant/agents/document_agent.py
+++ b/assistants/prospector-assistant/assistant/agents/document_agent.py
@@ -88,7 +88,7 @@ class Mode(BaseModel):
     name: ModeName = ModeName.UNDEFINED
     status: Status = Status.UNDEFINED
     step: Step = Step()
-    step_order: list[StepName] = []
+    step_order: list[dict[str, StepName | int]] = [{}]
 
     def _error_check(self) -> None:
         # name and status should either both be UNDEFINED or both be defined. Always.
@@ -140,10 +140,10 @@ def set_step(self, step: Step) -> None:
     def get_step(self) -> Step:
         return self.step
 
-    def set_step_order(self, steps: list[StepName]) -> None:
+    def set_step_order(self, steps: list[dict[str, StepName | int]]) -> None:
         self.step_order = steps
 
-    def get_step_order(self) -> list[StepName]:
+    def get_step_order(self) -> list[dict[str, StepName | int]]:
         return self.step_order
 
     def get_next_step(self) -> Step | None:
@@ -151,17 +151,39 @@ def get_next_step(self) -> Step | None:
         if len(steps) == 0:
             return None
 
+        # logic below is repetitive... needs clean up
         step = self.get_step()
         step_name = step.get_name()
-        if step_name is steps[-1]:
-            return None  # on final step
+        current_step_name = steps[-1].get("step_name")
+        if isinstance(current_step_name, StepName):
+            if current_step_name is step_name:
+                return None  # on final step
+        else:
+            logger.error("step_name is not StepName instance")
+            next_step_name = StepName.UNDEFINED
+            status = Status.UNDEFINED
+            return Step(name=next_step_name, status=status)
 
         for index, step in enumerate(steps[:-1]):
-            if step is step_name:
-                next_step_name = steps[index + 1]
+            current_step_name = step.get("step_name")
+            if isinstance(current_step_name, StepName):
+                if current_step_name is step_name:
+                    next_step_name = steps[index + 1].get("step_name")
+                    if isinstance(next_step_name, StepName):
+                        status = Status.INITIATED
+                        break
+                    else:
+                        logger.error("step_name not found in step of step_list")
+                        next_step_name = StepName.UNDEFINED
+                        status = Status.UNDEFINED
+                        break
+            else:
+                logger.error("step_name is not StepName instance")
+                next_step_name = StepName.UNDEFINED
+                status = Status.UNDEFINED
                 break
 
-        return Step(name=next_step_name, status=Status.INITIATED)
+        return Step(name=next_step_name, status=status)
 
 
 class State(BaseModel):
@@ -404,6 +426,39 @@ async def _run_mode(
                 logger.info("Document Agent in step: %s", step_name)
                 step_status, next_step_name = await step_method(config, context, message, metadata)
 
+                # Update run_count of step (This will need to be simplified--moved into its own function)
+                step_list = self._state.mode.get_step_order()
+                for step in step_list:
+                    list_step_name = step.get("step_name")
+                    if isinstance(list_step_name, StepName):
+                        if list_step_name is step_name:
+                            # This is bad... "run_count" and "step_name" dependent on implementation in a different function.  Need to cleanup.
+                            step_run_count = step.get("run_count")
+                            if isinstance(step_run_count, int):
+                                step_run_count += 1
+                                step["run_count"] = step_run_count
+                                self._state.mode.set_step_order(step_list)
+                                break  # done
+                            else:
+                                logger.error(
+                                    "Document Agent - step %s in step order does run_count not of type int.", step_name
+                                )
+                                self._state.mode.reset()
+                                self._write_state(context)
+                                return self._state.mode.get_status()  # problem
+                        else:
+                            # End of list
+                            if step is step_list[-1]:
+                                logger.error("Document Agent - step %s not found in step order.", step_name)
+                                self._state.mode.reset()
+                                self._write_state(context)
+                                return self._state.mode.get_status()  # problem
+                    else:
+                        logger.error("Document Agent - step_name of wrong type")
+                        self._state.mode.reset()
+                        self._write_state(context)
+                        return self._state.mode.get_status()  # problem
+
                 match step_status:  # resulting status of step_method()
                     case Status.NOT_COMPLETED:
                         self._state.mode.get_step().set_status(step_status)
@@ -492,14 +547,18 @@ async def _mode_draft_outline(
         if mode_status is Status.INITIATED:
             self._state.mode.set_step_order(
                 [
-                    StepName.DO_GC_ATTACHMENT_CHECK,
-                    StepName.DO_DRAFT_OUTLINE,
-                    StepName.DO_GC_GET_OUTLINE_FEEDBACK,
-                    StepName.DO_FINISH,
+                    {"step_name": StepName.DO_GC_ATTACHMENT_CHECK, "run_count": 0},
+                    {"step_name": StepName.DO_DRAFT_OUTLINE, "run_count": 0},
+                    {"step_name": StepName.DO_GC_GET_OUTLINE_FEEDBACK, "run_count": 0},
+                    {"step_name": StepName.DO_FINISH, "run_count": 0},
                 ],
             )
             logger.info("Document Agent mode (%s) at beginning.", mode_name)
-            first_step_name = self._state.mode.get_step_order()[0]
+            first_step_name = self._state.mode.get_step_order()[0].get("step_name")
+            if not isinstance(first_step_name, StepName):
+                logger.error("Document Agent: StepName could not be found in Mode's step order.")
+                self._state.mode.reset()
+                return self._state.mode.get_status()
             self._state.mode.set_step(Step(name=first_step_name, status=Status.INITIATED))
             self._write_state(context)
 
@@ -546,14 +605,18 @@ async def _mode_draft_paper(
         if mode_status is Status.INITIATED:
             self._state.mode.set_step_order(
                 [
-                    StepName.DO_GC_ATTACHMENT_CHECK,
-                    StepName.DO_DRAFT_OUTLINE,
-                    StepName.DO_GC_GET_OUTLINE_FEEDBACK,
-                    StepName.DP_DRAFT_CONTENT,
+                    {"step_name": StepName.DO_GC_ATTACHMENT_CHECK, "run_count": 0},
+                    {"step_name": StepName.DO_DRAFT_OUTLINE, "run_count": 0},
+                    {"step_name": StepName.DO_GC_GET_OUTLINE_FEEDBACK, "run_count": 0},
+                    {"step_name": StepName.DP_DRAFT_CONTENT, "run_count": 0},
                 ],
             )
             logger.info("Document Agent mode (%s) at beginning.", mode_name)
-            first_step_name = self._state.mode.get_step_order()[0]
+            first_step_name = self._state.mode.get_step_order()[0].get("step_name")
+            if not isinstance(first_step_name, StepName):
+                logger.error("Document Agent: StepName could not be found in Mode's step order.")
+                self._state.mode.reset()
+                return self._state.mode.get_status()
             self._state.mode.set_step(Step(name=first_step_name, status=Status.INITIATED))
             self._write_state(context)
 
@@ -909,6 +972,12 @@ async def _gc_outline_feedback(
     ) -> tuple[Status, StepName | None]:
         method_metadata_key = "document_agent_gc_outline_feedback"
 
+        # Pre-requisites
+        if self._state is None:
+            logger.error("Document Agent state is None. Returning.")
+            return Status.UNDEFINED, StepName.UNDEFINED
+
+        # Run
         if message is not None:
             user_message = message.content
         else:
@@ -924,6 +993,41 @@ async def _gc_outline_feedback(
         )
 
         # update artifact
+        # This step info approach is not cool. Rewriting code. Need to refactor.
+        step_name = self._state.mode.get_step().get_name()
+        step_list = self._state.mode.get_step_order()
+        for step in step_list:
+            list_step_name = step.get("step_name")
+            if isinstance(list_step_name, StepName):
+                if list_step_name is step_name:
+                    # This is bad... "run_count" and "step_name" dependent on implementation in a different function.  Need to cleanup.
+                    step_run_count = step.get("run_count")
+                    break
+                else:
+                    # End of list
+                    if step is step_list[-1]:
+                        logger.error("Document Agent - step %s not found in step order.", step_name)
+                        self._state.mode.reset()
+                        self._write_state(context)
+                        return self._state.mode.get_status(), StepName.UNDEFINED  # problem
+            else:
+                logger.error("Document Agent - step_name of wrong type")
+                self._state.mode.reset()
+                self._write_state(context)
+                return self._state.mode.get_status(), StepName.UNDEFINED  # problem
+
+        if not isinstance(step_run_count, int):
+            logger.error("Document Agent - step %s in step order does run_count not of type int.", step_name)
+            self._state.mode.reset()
+            self._write_state(context)
+            return self._state.mode.get_status(), StepName.UNDEFINED  # problem
+        else:
+            match step_run_count:
+                case 0:
+                    conversation_status_str = "user_initiated"
+                case _:
+                    conversation_status_str = "user_returned"
+
         filenames = await self._attachments_extension.get_attachment_filenames(
             context, config=config.agents_config.attachment_agent
         )
@@ -935,8 +1039,10 @@ async def _gc_outline_feedback(
 
         artifact_dict = guided_conversation.get_artifact_dict()
         if artifact_dict is not None:
+            artifact_dict["conversation_status"] = conversation_status_str
             artifact_dict["filenames"] = filenames_str
             artifact_dict["current_outline"] = outline_str
+
             guided_conversation.set_artifact_dict(artifact_dict)
         else:
             logger.error("artifact_dict unavailable.")