experimental support for openai reasoning models

microsoft · Nov 12, 2024 · 94fa4e4 · 94fa4e4
1 parent 17b7400
commit 94fa4e4
Show file tree

Hide file tree

Showing 3 changed files with 66 additions and 17 deletions.
diff --git a/assistants/explorer-assistant/assistant/chat.py b/assistants/explorer-assistant/assistant/chat.py
@@ -15,7 +15,7 @@
 from assistant_extensions.artifacts._model import ArtifactsConfigModel
 from assistant_extensions.attachments import AttachmentsExtension
 from content_safety.evaluators import CombinedContentSafetyEvaluator
-from openai.types.chat import ChatCompletionMessageParam
+from openai.types.chat import ChatCompletion, ChatCompletionMessageParam, ParsedChatCompletion
 from semantic_workbench_api_model.workbench_model import (
     AssistantStateEvent,
     ConversationEvent,
@@ -215,12 +215,19 @@ async def respond_to_conversation(
     # add the guardrails prompt to the system message content
     system_message_content += f"\n\n{config.guardrails_prompt}"
 
-    completion_messages: list[ChatCompletionMessageParam] = [
-        {
+    # reasoning models do not support system messages, so set the role to "user" for the system message
+    completion_messages: list[ChatCompletionMessageParam] = []
+    if config.request_config.is_reasoning_model:
+        # if the model is a reasoning model, add the system message as a user message
+        completion_messages.append({
+            "role": "user",
+            "content": system_message_content,
+        })
+    else:
+        completion_messages.append({
             "role": "system",
             "content": system_message_content,
-        }
-    ]
+        })
 
     # generate the attachment messages from the attachment agent
     attachment_messages = await attachments_extension.get_completion_messages_for_attachments(
@@ -269,6 +276,7 @@ async def respond_to_conversation(
     # initialize variables for the response content and total tokens used
     content = ""
     completion_total_tokens = 0
+    completion: ParsedChatCompletion | ChatCompletion | None = None
 
     # set default response message type
     message_type = MessageType.chat
@@ -312,19 +320,49 @@ async def respond_to_conversation(
 
             else:
                 # call the OpenAI API to generate a completion
-                completion = await client.chat.completions.create(
-                    messages=completion_messages,
-                    model=config.request_config.openai_model,
-                    max_tokens=config.request_config.response_tokens,
-                )
-
-                content = completion.choices[0].message.content
+                try:
+                    if config.request_config.is_reasoning_model:
+                        # for reasoning models, use max_completion_tokens instead of max_tokens
+                        completion = await client.chat.completions.create(
+                            messages=completion_messages,
+                            model=config.request_config.openai_model,
+                            max_completion_tokens=config.request_config.response_tokens,
+                        )
+                    else:
+                        completion = await client.chat.completions.create(
+                            messages=completion_messages,
+                            model=config.request_config.openai_model,
+                            max_tokens=config.request_config.response_tokens,
+                        )
+
+                    content = completion.choices[0].message.content
+                except Exception as e:
+                    logger.exception(f"exception occurred calling openai chat completion: {e}")
+                    content = (
+                        "An error occurred while calling the OpenAI API. Is it configured correctly?"
+                        " View the debug inspector for more information."
+                    )
+                    message_type = MessageType.notice
+                    deepmerge.always_merger.merge(
+                        metadata,
+                        {
+                            "debug": {
+                                method_metadata_key: {
+                                    "request": {
+                                        "model": config.request_config.openai_model,
+                                        "messages": completion_messages,
+                                    },
+                                    "error": str(e),
+                                },
+                            }
+                        },
+                    )
 
-            # get the total tokens used for the completion
-            completion_total_tokens = completion.usage.total_tokens if completion.usage else 0
-            footer_items = [
-                _get_token_usage_message(config.request_config.max_tokens, completion_total_tokens),
-            ]
+            footer_items = []
+            if completion is not None:
+                # get the total tokens used for the completion
+                completion_total_tokens = completion.usage.total_tokens if completion.usage else 0
+                footer_items.append(_get_token_usage_message(config.request_config.max_tokens, completion_total_tokens))
 
             # add the completion to the metadata for debugging
             deepmerge.always_merger.merge(

diff --git a/assistants/explorer-assistant/assistant/config.py b/assistants/explorer-assistant/assistant/config.py
@@ -111,6 +111,14 @@ class RequestConfig(BaseModel):
         Field(title="OpenAI Model", description="The OpenAI model to use for generating responses."),
     ] = "gpt-4o"
 
+    is_reasoning_model: Annotated[
+        bool,
+        Field(
+            title="Is Reasoning Model (o1-preview, o1-mini, etc)",
+            description="Experimental: enable support for reasoning models such as o1-preview, o1-mini, etc.",
+        ),
+    ] = False
+
 
 # the workbench app builds dynamic forms based on the configuration model and UI schema
 class AssistantConfigModel(BaseModel):

diff --git a/libraries/python/openai-client/openai_client/tokens.py b/libraries/python/openai-client/openai_client/tokens.py
@@ -37,6 +37,9 @@ def num_tokens_from_message(message: ChatCompletionMessageParam, model: str) ->
         "gpt-4-32k-0613",
         "gpt-4o-mini-2024-07-18",
         "gpt-4o-2024-08-06",
+        # TODO: determine correct handling of reasoning models
+        "o1-preview",
+        "o1-mini",
     }:
         tokens_per_message = 3
         tokens_per_name = 1