From 34d1002d3d167f1d99d044af68c06fe1f2aa6bc2 Mon Sep 17 00:00:00 2001 From: Yifan Mai Date: Wed, 5 Feb 2025 10:55:26 -0800 Subject: [PATCH] Add o3-mini model (#3304) --- setup.cfg | 2 +- src/helm/clients/openai_client.py | 22 +++++++----- src/helm/config/model_deployments.yaml | 47 ++++++++++++++++++++++++++ src/helm/config/model_metadata.yaml | 40 ++++++++++++++++++++++ 4 files changed, 102 insertions(+), 9 deletions(-) diff --git a/setup.cfg b/setup.cfg index e36da4a1d00..3f9de4da968 100644 --- a/setup.cfg +++ b/setup.cfg @@ -146,7 +146,7 @@ mistral = mistralai~=1.1 openai = - openai~=1.52 + openai~=1.61 tiktoken~=0.7 pydantic~=2.0 # For model_dump(mode="json") - openai only requires pydantic>=1.9.0 diff --git a/src/helm/clients/openai_client.py b/src/helm/clients/openai_client.py index f1b2d4e799f..077d2d4b128 100644 --- a/src/helm/clients/openai_client.py +++ b/src/helm/clients/openai_client.py @@ -44,14 +44,18 @@ def __init__( api_key: Optional[str] = None, org_id: Optional[str] = None, base_url: Optional[str] = None, + reasoning_effort: Optional[str] = None, + openai_model_name: Optional[str] = None, ): super().__init__(cache_config=cache_config) self.tokenizer = tokenizer self.tokenizer_name = tokenizer_name self.client = OpenAI(api_key=api_key, organization=org_id, base_url=base_url) + self.reasoning_effort = reasoning_effort + self.openai_model_name = openai_model_name def _get_model_for_request(self, request: Request) -> str: - return request.model_engine + return self.openai_model_name or request.model_engine def _get_cache_key(self, raw_request: Dict, request: Request): cache_key = CachingClient.make_cache_key(raw_request, request) @@ -175,7 +179,7 @@ def _make_chat_request(self, request: Request) -> RequestResult: # Special handling for o1 models. # Refer to the "Reasoning models" documentation further discussion of o1 model limitations: # https://platform.openai.com/docs/guides/reasoning - if request.model_engine.startswith("o1"): + if request.model_engine.startswith("o1") or request.model_engine.startswith("o3"): # Avoid error: # "Unsupported parameter: 'max_tokens' is not supported with this model. Use 'max_completion_tokens' instead." # noqa: E501 # Note that openai>=1.45 is needed for this @@ -187,12 +191,14 @@ def _make_chat_request(self, request: Request) -> RequestResult: if raw_request["stop"] is None: raw_request.pop("stop") - if request.model_engine == "o1-2024-12-17": - # Avoid error: - # "Error code: 400 - {'error': {'message': "Unsupported parameter: 'temperature' is - # not supported with this model.", 'type': 'invalid_request_error', 'param': 'temperature', - # 'code': 'unsupported_parameter'}}" - raw_request.pop("temperature", None) + # Avoid error: + # "Error code: 400 - {'error': {'message': "Unsupported parameter: 'temperature' is + # not supported with this model.", 'type': 'invalid_request_error', 'param': 'temperature', + # 'code': 'unsupported_parameter'}}" + raw_request.pop("temperature", None) + + if self.reasoning_effort: + raw_request["reasoning_effort"] = "self.reasoning_effort" elif is_vlm(request.model): # Avoid error: # "Invalid type for 'stop': expected an unsupported value, but got null instead." diff --git a/src/helm/config/model_deployments.yaml b/src/helm/config/model_deployments.yaml index c57352d5798..12a158fb28b 100644 --- a/src/helm/config/model_deployments.yaml +++ b/src/helm/config/model_deployments.yaml @@ -1965,6 +1965,26 @@ model_deployments: client_spec: class_name: "helm.clients.openai_client.OpenAIClient" + - name: openai/o1-2024-12-17-low-reasoning-effort + model_name: openai/o1-2024-12-17-low-reasoning-effort + tokenizer_name: openai/cl100k_base + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.openai_client.OpenAIClient" + args: + openai_model_name: o1-2024-12-17 + reasoning_effort: low + + - name: openai/o1-2024-12-17-high-reasoning-effort + model_name: openai/o1-2024-12-17-high-reasoning-effort + tokenizer_name: openai/cl100k_base + max_sequence_length: 128000 + client_spec: + class_name: "helm.clients.openai_client.OpenAIClient" + args: + openai_model_name: o1-2024-12-17 + reasoning_effort: high + - name: openai/o1-preview-2024-09-12 model_name: openai/o1-preview-2024-09-12 tokenizer_name: openai/cl100k_base @@ -1979,6 +1999,33 @@ model_deployments: client_spec: class_name: "helm.clients.openai_client.OpenAIClient" + - name: openai/o3-mini-2025-01-31 + model_name: openai/o3-mini-2025-01-31 + tokenizer_name: openai/cl100k_base + max_sequence_length: 200000 + client_spec: + class_name: "helm.clients.openai_client.OpenAIClient" + + - name: openai/o3-mini-2025-01-31-low-reasoning-effort + model_name: openai/o3-mini-2025-01-31-low-reasoning-effort + tokenizer_name: openai/cl100k_base + max_sequence_length: 200000 + client_spec: + class_name: "helm.clients.openai_client.OpenAIClient" + args: + openai_model_name: o3-mini-2025-01-31 + reasoning_effort: low + + - name: openai/o3-mini-2025-01-31-high-reasoning-effort + model_name: openai/o3-mini-2025-01-31-high-reasoning-effort + tokenizer_name: openai/cl100k_base + max_sequence_length: 200000 + client_spec: + class_name: "helm.clients.openai_client.OpenAIClient" + args: + openai_model_name: o3-mini-2025-01-31 + reasoning_effort: high + ## Text Similarity Models # OpenAI similarity embedding models: https://beta.openai.com/docs/guides/embeddings # The number of parameters is guessed based on the number of parameters of the diff --git a/src/helm/config/model_metadata.yaml b/src/helm/config/model_metadata.yaml index 9efe64c637e..a5d9fa0680f 100644 --- a/src/helm/config/model_metadata.yaml +++ b/src/helm/config/model_metadata.yaml @@ -2643,6 +2643,22 @@ models: release_date: 2024-12-17 tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + - name: openai/o1-2024-12-17-low-reasoning-effort + display_name: o1 (2024-12-17, low reasoning effort) + description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to low. + creator_organization_name: OpenAI + access: limited + release_date: 2024-12-17 + tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + + - name: openai/o1-2024-12-17-high-reasoning-effort + display_name: o1 (2024-12-17, high reasoning effort) + description: o1 is a new large language model trained with reinforcement learning to perform complex reasoning. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) The requests' reasoning effort parameter in is set to high. + creator_organization_name: OpenAI + access: limited + release_date: 2024-12-17 + tags: [TEXT_MODEL_TAG, VISION_LANGUAGE_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + - name: openai/o1-preview-2024-09-12 display_name: o1-preview (2024-09-12) description: o1-preview is a language model trained with reinforcement learning to perform complex reasoning that can produce a long internal chain of thought before responding to the user. ([model card](https://openai.com/index/openai-o1-system-card/), [blog post](https://openai.com/index/learning-to-reason-with-llms/)) @@ -2659,6 +2675,30 @@ models: release_date: 2024-09-12 tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + - name: openai/o3-mini-2025-01-31 + display_name: o3-mini (2025-01-31) + description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) + creator_organization_name: OpenAI + access: limited + release_date: 2025-01-31 + tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + + - name: openai/o3-mini-2025-01-31-low-reasoning-effort + display_name: o3-mini (2025-01-31, low reasoning effort) + description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to low. + creator_organization_name: OpenAI + access: limited + release_date: 2025-01-31 + tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + + - name: openai/o3-mini-2025-01-31-high-reasoning-effort + display_name: o3-mini (2025-01-31, high reasoning effort) + description: o3-mini is a small reasoning model form OpenAI that aims to deliver STEM capabilities while maintaining the low cost and reduced latency of OpenAI o1-mini. ([blog post](https://openai.com/index/openai-o3-mini/)) The requests' reasoning effort parameter in is set to high. + creator_organization_name: OpenAI + access: limited + release_date: 2025-01-31 + tags: [TEXT_MODEL_TAG, LIMITED_FUNCTIONALITY_TEXT_MODEL_TAG, INSTRUCTION_FOLLOWING_MODEL_TAG] + ## Codex Models # DEPRECATED: Codex models have been shut down on March 23 2023.