feat(api): update enum values, comments, and examples (#2045)

openai · Jan 22, 2025 · b9824d2 · b9824d2
1 parent 7f81d21
commit b9824d2
Show file tree

Hide file tree

Showing 20 changed files with 152 additions and 146 deletions.
diff --git a/.stats.yml b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 69
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-b5b0e2c794b012919701c3fd43286af10fa25d33ceb8a881bec2636028f446e0.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-3904ef6b29a89c98f93a9b7da19879695f3c440564be6384db7af1b734611ede.yml
diff --git a/src/openai/resources/audio/speech.py b/src/openai/resources/audio/speech.py
@@ -53,7 +53,7 @@ def create(
         *,
         input: str,
         model: Union[str, SpeechModel],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
+        voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
         response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
         speed: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -73,9 +73,9 @@ def create(
               One of the available [TTS models](https://platform.openai.com/docs/models#tts):
               `tts-1` or `tts-1-hd`
 
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
-              available in the
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
+              voices are available in the
               [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
 
           response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
@@ -137,7 +137,7 @@ async def create(
         *,
         input: str,
         model: Union[str, SpeechModel],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
+        voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
         response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
         speed: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -157,9 +157,9 @@ async def create(
               One of the available [TTS models](https://platform.openai.com/docs/models#tts):
               `tts-1` or `tts-1-hd`
 
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
-              available in the
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
+              voices are available in the
               [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
 
           response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,

diff --git a/src/openai/resources/beta/realtime/sessions.py b/src/openai/resources/beta/realtime/sessions.py
@@ -46,18 +46,19 @@ def with_streaming_response(self) -> SessionsWithStreamingResponse:
     def create(
         self,
         *,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
         model: Literal[
             "gpt-4o-realtime-preview",
             "gpt-4o-realtime-preview-2024-10-01",
             "gpt-4o-realtime-preview-2024-12-17",
             "gpt-4o-mini-realtime-preview",
             "gpt-4o-mini-realtime-preview-2024-12-17",
-        ],
-        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
-        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
-        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        ]
+        | NotGiven = NOT_GIVEN,
         output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
@@ -81,9 +82,9 @@ def create(
         the Realtime API.
 
         Args:
-          model: The Realtime model used for this session.
-
-          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
 
           input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
               `null` to turn off once on. Input audio transcription is not native to the
@@ -110,7 +111,10 @@ def create(
           modalities: The set of modalities the model can respond with. To disable audio, set this to
               ["text"].
 
+          model: The Realtime model used for this session.
+
           output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
 
           temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
 
@@ -140,12 +144,12 @@ def create(
             "/realtime/sessions",
             body=maybe_transform(
                 {
-                    "model": model,
                     "input_audio_format": input_audio_format,
                     "input_audio_transcription": input_audio_transcription,
                     "instructions": instructions,
                     "max_response_output_tokens": max_response_output_tokens,
                     "modalities": modalities,
+                    "model": model,
                     "output_audio_format": output_audio_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
@@ -185,18 +189,19 @@ def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
     async def create(
         self,
         *,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
         model: Literal[
             "gpt-4o-realtime-preview",
             "gpt-4o-realtime-preview-2024-10-01",
             "gpt-4o-realtime-preview-2024-12-17",
             "gpt-4o-mini-realtime-preview",
             "gpt-4o-mini-realtime-preview-2024-12-17",
-        ],
-        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
-        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
-        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        ]
+        | NotGiven = NOT_GIVEN,
         output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
@@ -220,9 +225,9 @@ async def create(
         the Realtime API.
 
         Args:
-          model: The Realtime model used for this session.
-
-          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
 
           input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
               `null` to turn off once on. Input audio transcription is not native to the
@@ -249,7 +254,10 @@ async def create(
           modalities: The set of modalities the model can respond with. To disable audio, set this to
               ["text"].
 
+          model: The Realtime model used for this session.
+
           output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
 
           temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
 
@@ -279,12 +287,12 @@ async def create(
             "/realtime/sessions",
             body=await async_maybe_transform(
                 {
-                    "model": model,
                     "input_audio_format": input_audio_format,
                     "input_audio_transcription": input_audio_transcription,
                     "instructions": instructions,
                     "max_response_output_tokens": max_response_output_tokens,
                     "modalities": modalities,
+                    "model": model,
                     "output_audio_format": output_audio_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,

diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
@@ -249,9 +249,6 @@ def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -507,9 +504,6 @@ def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -758,9 +752,6 @@ def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1109,9 +1100,6 @@ async def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1367,9 +1355,6 @@ async def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1618,9 +1603,6 @@ async def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in

diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py
@@ -69,7 +69,8 @@ def create(
               `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              for counting tokens. Some models may also impose a limit on total number of
+              tokens summed across inputs.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
@@ -159,7 +160,8 @@ async def create(
               `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              for counting tokens. Some models may also impose a limit on total number of
+              tokens summed across inputs.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to

diff --git a/src/openai/types/audio/speech_create_params.py b/src/openai/types/audio/speech_create_params.py
@@ -20,11 +20,11 @@ class SpeechCreateParams(TypedDict, total=False):
     `tts-1` or `tts-1-hd`
     """
 
-    voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]]
+    voice: Required[Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]]
     """The voice to use when generating the audio.
 
-    Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
-    Previews of the voices are available in the
+    Supported voices are `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova`,
+    `sage` and `shimmer`. Previews of the voices are available in the
     [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
     """
 

diff --git a/src/openai/types/beta/realtime/conversation_item_create_event.py b/src/openai/types/beta/realtime/conversation_item_create_event.py
@@ -20,9 +20,10 @@ class ConversationItemCreateEvent(BaseModel):
     """Optional client-generated ID used to identify this event."""
 
     previous_item_id: Optional[str] = None
-    """The ID of the preceding item after which the new item will be inserted.
-
-    If not set, the new item will be appended to the end of the conversation. If
-    set, it allows an item to be inserted mid-conversation. If the ID cannot be
-    found, an error will be returned and the item will not be added.
+    """
+    The ID of the preceding item after which the new item will be inserted. If not
+    set, the new item will be appended to the end of the conversation. If set to
+    `root`, the new item will be added to the beginning of the conversation. If set
+    to an existing ID, it allows an item to be inserted mid-conversation. If the ID
+    cannot be found, an error will be returned and the item will not be added.
     """
diff --git a/src/openai/types/beta/realtime/conversation_item_create_event_param.py b/src/openai/types/beta/realtime/conversation_item_create_event_param.py
@@ -20,9 +20,10 @@ class ConversationItemCreateEventParam(TypedDict, total=False):
     """Optional client-generated ID used to identify this event."""
 
     previous_item_id: str
-    """The ID of the preceding item after which the new item will be inserted.
-
-    If not set, the new item will be appended to the end of the conversation. If
-    set, it allows an item to be inserted mid-conversation. If the ID cannot be
-    found, an error will be returned and the item will not be added.
+    """
+    The ID of the preceding item after which the new item will be inserted. If not
+    set, the new item will be appended to the end of the conversation. If set to
+    `root`, the new item will be added to the beginning of the conversation. If set
+    to an existing ID, it allows an item to be inserted mid-conversation. If the ID
+    cannot be found, an error will be returned and the item will not be added.
     """
diff --git a/src/openai/types/beta/realtime/session.py b/src/openai/types/beta/realtime/session.py
@@ -63,7 +63,12 @@ class Session(BaseModel):
     """Unique identifier for the session object."""
 
     input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
-    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
 
     input_audio_transcription: Optional[InputAudioTranscription] = None
     """
@@ -117,7 +122,11 @@ class Session(BaseModel):
     """The Realtime model used for this session."""
 
     output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
-    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
 
     temperature: Optional[float] = None
     """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""

diff --git a/src/openai/types/beta/realtime/session_create_params.py b/src/openai/types/beta/realtime/session_create_params.py
@@ -3,25 +3,19 @@
 from __future__ import annotations
 
 from typing import List, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, TypedDict
 
 __all__ = ["SessionCreateParams", "InputAudioTranscription", "Tool", "TurnDetection"]
 
 
 class SessionCreateParams(TypedDict, total=False):
-    model: Required[
-        Literal[
-            "gpt-4o-realtime-preview",
-            "gpt-4o-realtime-preview-2024-10-01",
-            "gpt-4o-realtime-preview-2024-12-17",
-            "gpt-4o-mini-realtime-preview",
-            "gpt-4o-mini-realtime-preview-2024-12-17",
-        ]
-    ]
-    """The Realtime model used for this session."""
-
     input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
-    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
 
     input_audio_transcription: InputAudioTranscription
     """
@@ -61,8 +55,21 @@ class SessionCreateParams(TypedDict, total=False):
     To disable audio, set this to ["text"].
     """
 
+    model: Literal[
+        "gpt-4o-realtime-preview",
+        "gpt-4o-realtime-preview-2024-10-01",
+        "gpt-4o-realtime-preview-2024-12-17",
+        "gpt-4o-mini-realtime-preview",
+        "gpt-4o-mini-realtime-preview-2024-12-17",
+    ]
+    """The Realtime model used for this session."""
+
     output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
-    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
 
     temperature: float
     """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""