From da44dbbd33223f3b27f93bd1724727391db7e56b Mon Sep 17 00:00:00 2001
From: Brian Krabach <brkrabac@microsoft.com>
Date: Wed, 6 Nov 2024 09:41:03 -0800
Subject: [PATCH] explorer assistant now includes all chat history (not just
 last 100 messages) when generating a response (#222)

---
 .../explorer-assistant/assistant/chat.py      | 151 ++++++++++++------
 .../chat-driver/chat_driver/chat_driver.py    |   4 +-
 .../openai-client/openai_client/completion.py |   4 +-
 .../openai-client/openai_client/messages.py   |  22 +--
 4 files changed, 117 insertions(+), 64 deletions(-)

diff --git a/assistants/explorer-assistant/assistant/chat.py b/assistants/explorer-assistant/assistant/chat.py
index 74735ed5..363ea4e0 100644
--- a/assistants/explorer-assistant/assistant/chat.py
+++ b/assistants/explorer-assistant/assistant/chat.py
@@ -7,7 +7,7 @@
 
 import logging
 import re
-from typing import Any
+from typing import Any, Awaitable, Callable
 
 import deepmerge
 import openai_client
@@ -230,10 +230,6 @@ async def respond_to_conversation(
     # add the attachment messages to the completion messages
     completion_messages.extend(attachment_messages)
 
-    # get messages before the current message
-    messages_response = await context.get_messages(before=message.id)
-    messages = messages_response.messages + [message]
-
     # calculate the token count for the messages so far
     token_count = sum([
         openai_client.num_tokens_from_message(model=config.request_config.openai_model, message=completion_message)
@@ -243,49 +239,13 @@ async def respond_to_conversation(
     # calculate the total available tokens for the response generation
     available_tokens = config.request_config.max_tokens - config.request_config.response_tokens
 
-    # build the completion messages from the conversation history
-    history_messages: list[ChatCompletionMessageParam] = []
-
-    # add the messages in reverse order to get the most recent messages first
-    for message in reversed(messages):
-        chat_completion_messages: list[ChatCompletionMessageParam] = []
-
-        # add the message to the completion messages, treating any message from a source other than the assistant
-        # as a user message
-        if message.sender.participant_id == context.assistant.id:
-            chat_completion_messages.append({
-                "role": "assistant",
-                "content": _format_message(message, participants_response.participants),
-            })
-
-        else:
-            # we are working with the messages in reverse order, so include any attachments before the message
-            if message.filenames and len(message.filenames) > 0:
-                # add a system message to indicate the attachments
-                chat_completion_messages.append({
-                    "role": "system",
-                    "content": f"Attachment(s): {', '.join(message.filenames)}",
-                })
-
-            # add the user message to the completion messages
-            chat_completion_messages.append({
-                "role": "user",
-                "content": _format_message(message, participants_response.participants),
-            })
-
-        # calculate the token count for the messages and check if it exceeds the available tokens
-        token_count += sum([
-            openai_client.num_tokens_from_message(model=config.request_config.openai_model, message=message)
-            for message in chat_completion_messages
-        ])
-        if token_count > available_tokens:
-            # stop processing messages if the token count exceeds the available tokens
-            break
-
-        history_messages.extend(chat_completion_messages)
-
-    # reverse the history messages to get them back in the correct order
-    history_messages.reverse()
+    history_messages = await _get_history_messages(
+        context=context,
+        participants=participants_response.participants,
+        converter=_conversation_message_to_chat_completion_message_params,
+        model=config.request_config.openai_model,
+        token_limit=available_tokens - token_count,
+    )
 
     # add the history messages to the completion messages
     completion_messages.extend(history_messages)
@@ -487,8 +447,101 @@ async def respond_to_conversation(
 # region Helpers
 #
 
-
 # TODO: move to a common module, such as either the openai_client or attachment module for easy re-use in other assistants
+
+
+async def _conversation_message_to_chat_completion_message_params(
+    context: ConversationContext, message: ConversationMessage, participants: list[ConversationParticipant]
+) -> list[ChatCompletionMessageParam]:
+    """
+    Convert a conversation message to a list of chat completion message parameters.
+    """
+
+    # some messages may have multiple parts, such as a text message with an attachment
+    chat_completion_messages: list[ChatCompletionMessageParam] = []
+
+    # add the message to the completion messages, treating any message from a source other than the assistant
+    # as a user message
+    if message.sender.participant_id == context.assistant.id:
+        chat_completion_messages.append({
+            "role": "assistant",
+            "content": _format_message(message, participants),
+        })
+
+    else:
+        # add the user message to the completion messages
+        chat_completion_messages.append({
+            "role": "user",
+            "content": _format_message(message, participants),
+        })
+
+        if message.filenames and len(message.filenames) > 0:
+            # add a system message to indicate the attachments
+            chat_completion_messages.append({
+                "role": "system",
+                "content": f"Attachment(s): {', '.join(message.filenames)}",
+            })
+
+    return chat_completion_messages
+
+
+async def _get_history_messages(
+    context: ConversationContext,
+    participants: list[ConversationParticipant],
+    converter: Callable[
+        [ConversationContext, ConversationMessage, list[ConversationParticipant]],
+        Awaitable[list[ChatCompletionMessageParam]],
+    ],
+    model: str,
+    token_limit: int | None = None,
+) -> list[ChatCompletionMessageParam]:
+    """
+    Get all messages in the conversation, formatted for use in a completion.
+    """
+
+    # each call to get_messages will return a maximum of 100 messages
+    # so we need to loop until all messages are retrieved
+    # if token_limit is provided, we will stop when the token limit is reached
+
+    history = []
+    token_count = 0
+    before_message_id = None
+
+    while True:
+        # get the next batch of messages
+        messages_response = await context.get_messages(limit=100, before=before_message_id)
+        messages_list = messages_response.messages
+
+        # if there are no more messages, break the loop
+        if not messages_list or messages_list.count == 0:
+            break
+
+        # set the before_message_id for the next batch of messages
+        before_message_id = messages_list[0].id
+
+        # messages are returned in reverse order, so we need to reverse them
+        for message in reversed(messages_list):
+            # format the message
+            formatted_message_list = await converter(context, message, participants)
+
+            for formatted_message in formatted_message_list:
+                # calculate the token count for the message
+                try:
+                    token_count += openai_client.num_tokens_from_message(model=model, message=formatted_message)
+                except Exception as e:
+                    logger.exception(f"exception occurred calculating token count: {e}")
+
+            # if a token limit is provided and the token count exceeds the limit, break the loop
+            if token_limit and token_count > token_limit:
+                break
+
+            # insert the formatted messages into the beginning of the history list
+            history = formatted_message_list + history
+
+    # return the formatted messages
+    return history
+
+
 def _get_token_usage_message(
     max_tokens: int,
     completion_total_tokens: int,
diff --git a/libraries/python/chat-driver/chat_driver/chat_driver.py b/libraries/python/chat-driver/chat_driver/chat_driver.py
index 0a0044ba..9da1016a 100644
--- a/libraries/python/chat-driver/chat_driver/chat_driver.py
+++ b/libraries/python/chat-driver/chat_driver/chat_driver.py
@@ -11,7 +11,7 @@
     ChatCompletionUserMessageParam,
 )
 from openai.types.chat.completion_create_params import ResponseFormat
-from openai_client.completion import TEXT_RESPONSE_FORMAT, message_string_from_completion
+from openai_client.completion import TEXT_RESPONSE_FORMAT, message_content_from_completion
 from openai_client.errors import CompletionError
 from openai_client.messages import MessageFormatter, format_with_dict
 from openai_client.tools import complete_with_tool_calls, function_list_to_tools, function_registry_to_tools
@@ -198,7 +198,7 @@ async def respond(
         # Return the response.
 
         return MessageEvent(
-            message=message_string_from_completion(completion) or None,
+            message=message_content_from_completion(completion) or None,
             metadata=metadata,
         )
 
diff --git a/libraries/python/openai-client/openai_client/completion.py b/libraries/python/openai-client/openai_client/completion.py
index dbbc3824..154d3cac 100644
--- a/libraries/python/openai-client/openai_client/completion.py
+++ b/libraries/python/openai-client/openai_client/completion.py
@@ -34,13 +34,13 @@ def message_from_completion(completion: ParsedChatCompletion) -> ParsedChatCompl
     return completion.choices[0].message if completion and completion.choices else None
 
 
-def message_string_from_completion(completion: ParsedChatCompletion | None) -> str:
+def message_content_from_completion(completion: ParsedChatCompletion | None) -> str:
     if not completion or not completion.choices or not completion.choices[0].message:
         return ""
     return completion.choices[0].message.content or ""
 
 
-def message_dict_from_completion(completion: ParsedChatCompletion) -> dict[str, Any] | None:
+def message_content_dict_from_completion(completion: ParsedChatCompletion) -> dict[str, Any] | None:
     message = message_from_completion(completion)
     if message:
         if message.parsed:
diff --git a/libraries/python/openai-client/openai_client/messages.py b/libraries/python/openai-client/openai_client/messages.py
index 01a77ba0..13b981ae 100644
--- a/libraries/python/openai-client/openai_client/messages.py
+++ b/libraries/python/openai-client/openai_client/messages.py
@@ -83,27 +83,27 @@ def apply_truncation_to_dict(dict_: dict, maximum_length: int, filler_text: str)
 MessageFormatter = Callable[[str, dict[str, Any]], str]
 
 
-def format_with_dict(message: str, vars: dict[str, Any]) -> str:
+def format_with_dict(value: str, vars: dict[str, Any]) -> str:
     """
-    Format a message with the given variables using the Python format method.
+    Format a string with the given variables using the Python format method.
     """
-    if message and vars:
+    if value and vars:
         for key, value in vars.items():
             try:
-                message = message.format(**{key: value})
+                value = value.format(**{key: value})
             except KeyError:
                 pass
-    return message
+    return value
 
 
-def format_with_liquid(message: str, vars: dict[str, Any]) -> str:
+def format_with_liquid(value: str, vars: dict[str, Any]) -> str:
     """
-    Format a message with the given variables using the Liquid template engine.
+    Format a string with the given variables using the Liquid template engine.
     """
-    out = message
-    if not message:
-        return message
-    template = Template(message)
+    out = value
+    if not value:
+        return value
+    template = Template(value)
     out = template.render(**vars)
     return out