diff --git a/.stats.yml b/.stats.yml
index 9600edae3b..d518bac586 100644
--- a/.stats.yml
+++ b/.stats.yml
@@ -1,2 +1,2 @@
 configured_endpoints: 69
-openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-b5b0e2c794b012919701c3fd43286af10fa25d33ceb8a881bec2636028f446e0.yml
+openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai-3904ef6b29a89c98f93a9b7da19879695f3c440564be6384db7af1b734611ede.yml
diff --git a/src/openai/resources/audio/speech.py b/src/openai/resources/audio/speech.py
index 805a8c19c9..ad01118161 100644
--- a/src/openai/resources/audio/speech.py
+++ b/src/openai/resources/audio/speech.py
@@ -53,7 +53,7 @@ def create(
         *,
         input: str,
         model: Union[str, SpeechModel],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
+        voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
         response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
         speed: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -73,9 +73,9 @@ def create(
               One of the available [TTS models](https://platform.openai.com/docs/models#tts):
               `tts-1` or `tts-1-hd`
 
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
-              available in the
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
+              voices are available in the
               [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
 
           response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
@@ -137,7 +137,7 @@ async def create(
         *,
         input: str,
         model: Union[str, SpeechModel],
-        voice: Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
+        voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
         response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
         speed: float | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -157,9 +157,9 @@ async def create(
               One of the available [TTS models](https://platform.openai.com/docs/models#tts):
               `tts-1` or `tts-1-hd`
 
-          voice: The voice to use when generating the audio. Supported voices are `alloy`,
-              `echo`, `fable`, `onyx`, `nova`, and `shimmer`. Previews of the voices are
-              available in the
+          voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
+              `coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
+              voices are available in the
               [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
 
           response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
diff --git a/src/openai/resources/beta/realtime/sessions.py b/src/openai/resources/beta/realtime/sessions.py
index 8d2df30753..b920c89207 100644
--- a/src/openai/resources/beta/realtime/sessions.py
+++ b/src/openai/resources/beta/realtime/sessions.py
@@ -46,18 +46,19 @@ def with_streaming_response(self) -> SessionsWithStreamingResponse:
     def create(
         self,
         *,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
         model: Literal[
             "gpt-4o-realtime-preview",
             "gpt-4o-realtime-preview-2024-10-01",
             "gpt-4o-realtime-preview-2024-12-17",
             "gpt-4o-mini-realtime-preview",
             "gpt-4o-mini-realtime-preview-2024-12-17",
-        ],
-        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
-        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
-        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        ]
+        | NotGiven = NOT_GIVEN,
         output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
@@ -81,9 +82,9 @@ def create(
         the Realtime API.
 
         Args:
-          model: The Realtime model used for this session.
-
-          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
 
           input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
               `null` to turn off once on. Input audio transcription is not native to the
@@ -110,7 +111,10 @@ def create(
           modalities: The set of modalities the model can respond with. To disable audio, set this to
               ["text"].
 
+          model: The Realtime model used for this session.
+
           output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
 
           temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
 
@@ -140,12 +144,12 @@ def create(
             "/realtime/sessions",
             body=maybe_transform(
                 {
-                    "model": model,
                     "input_audio_format": input_audio_format,
                     "input_audio_transcription": input_audio_transcription,
                     "instructions": instructions,
                     "max_response_output_tokens": max_response_output_tokens,
                     "modalities": modalities,
+                    "model": model,
                     "output_audio_format": output_audio_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
@@ -185,18 +189,19 @@ def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
     async def create(
         self,
         *,
+        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
+        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
+        instructions: str | NotGiven = NOT_GIVEN,
+        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
+        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
         model: Literal[
             "gpt-4o-realtime-preview",
             "gpt-4o-realtime-preview-2024-10-01",
             "gpt-4o-realtime-preview-2024-12-17",
             "gpt-4o-mini-realtime-preview",
             "gpt-4o-mini-realtime-preview-2024-12-17",
-        ],
-        input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
-        input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
-        instructions: str | NotGiven = NOT_GIVEN,
-        max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
-        modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
+        ]
+        | NotGiven = NOT_GIVEN,
         output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
         temperature: float | NotGiven = NOT_GIVEN,
         tool_choice: str | NotGiven = NOT_GIVEN,
@@ -220,9 +225,9 @@ async def create(
         the Realtime API.
 
         Args:
-          model: The Realtime model used for this session.
-
-          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+          input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
+              `pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
+              (mono), and little-endian byte order.
 
           input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
               `null` to turn off once on. Input audio transcription is not native to the
@@ -249,7 +254,10 @@ async def create(
           modalities: The set of modalities the model can respond with. To disable audio, set this to
               ["text"].
 
+          model: The Realtime model used for this session.
+
           output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
+              For `pcm16`, output audio is sampled at a rate of 24kHz.
 
           temperature: Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8.
 
@@ -279,12 +287,12 @@ async def create(
             "/realtime/sessions",
             body=await async_maybe_transform(
                 {
-                    "model": model,
                     "input_audio_format": input_audio_format,
                     "input_audio_transcription": input_audio_transcription,
                     "instructions": instructions,
                     "max_response_output_tokens": max_response_output_tokens,
                     "modalities": modalities,
+                    "model": model,
                     "output_audio_format": output_audio_format,
                     "temperature": temperature,
                     "tool_choice": tool_choice,
diff --git a/src/openai/resources/chat/completions.py b/src/openai/resources/chat/completions.py
index af76caf401..c44b9d0c30 100644
--- a/src/openai/resources/chat/completions.py
+++ b/src/openai/resources/chat/completions.py
@@ -249,9 +249,6 @@ def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -507,9 +504,6 @@ def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -758,9 +752,6 @@ def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1109,9 +1100,6 @@ async def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1367,9 +1355,6 @@ async def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
@@ -1618,9 +1603,6 @@ async def create(
                 tier with a lower uptime SLA and no latency guarentee.
               - When not set, the default behavior is 'auto'.
 
-              When this parameter is set, the response body will include the `service_tier`
-              utilized.
-
           stop: Up to 4 sequences where the API will stop generating further tokens.
 
           store: Whether or not to store the output of this chat completion request for use in
diff --git a/src/openai/resources/embeddings.py b/src/openai/resources/embeddings.py
index 58efdcefa8..e6c09f1374 100644
--- a/src/openai/resources/embeddings.py
+++ b/src/openai/resources/embeddings.py
@@ -69,7 +69,8 @@ def create(
               `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              for counting tokens. Some models may also impose a limit on total number of
+              tokens summed across inputs.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
@@ -159,7 +160,8 @@ async def create(
               `text-embedding-ada-002`), cannot be an empty string, and any array must be 2048
               dimensions or less.
               [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-              for counting tokens.
+              for counting tokens. Some models may also impose a limit on total number of
+              tokens summed across inputs.
 
           model: ID of the model to use. You can use the
               [List models](https://platform.openai.com/docs/api-reference/models/list) API to
diff --git a/src/openai/types/audio/speech_create_params.py b/src/openai/types/audio/speech_create_params.py
index a60d000708..ed1a1ce748 100644
--- a/src/openai/types/audio/speech_create_params.py
+++ b/src/openai/types/audio/speech_create_params.py
@@ -20,11 +20,11 @@ class SpeechCreateParams(TypedDict, total=False):
     `tts-1` or `tts-1-hd`
     """
 
-    voice: Required[Literal["alloy", "echo", "fable", "onyx", "nova", "shimmer"]]
+    voice: Required[Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"]]
     """The voice to use when generating the audio.
 
-    Supported voices are `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer`.
-    Previews of the voices are available in the
+    Supported voices are `alloy`, `ash`, `coral`, `echo`, `fable`, `onyx`, `nova`,
+    `sage` and `shimmer`. Previews of the voices are available in the
     [Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
     """
 
diff --git a/src/openai/types/beta/realtime/conversation_item_create_event.py b/src/openai/types/beta/realtime/conversation_item_create_event.py
index 50d309675b..c4f72b9aff 100644
--- a/src/openai/types/beta/realtime/conversation_item_create_event.py
+++ b/src/openai/types/beta/realtime/conversation_item_create_event.py
@@ -20,9 +20,10 @@ class ConversationItemCreateEvent(BaseModel):
     """Optional client-generated ID used to identify this event."""
 
     previous_item_id: Optional[str] = None
-    """The ID of the preceding item after which the new item will be inserted.
-
-    If not set, the new item will be appended to the end of the conversation. If
-    set, it allows an item to be inserted mid-conversation. If the ID cannot be
-    found, an error will be returned and the item will not be added.
+    """
+    The ID of the preceding item after which the new item will be inserted. If not
+    set, the new item will be appended to the end of the conversation. If set to
+    `root`, the new item will be added to the beginning of the conversation. If set
+    to an existing ID, it allows an item to be inserted mid-conversation. If the ID
+    cannot be found, an error will be returned and the item will not be added.
     """
diff --git a/src/openai/types/beta/realtime/conversation_item_create_event_param.py b/src/openai/types/beta/realtime/conversation_item_create_event_param.py
index b8c8bbc251..6da5a63a9d 100644
--- a/src/openai/types/beta/realtime/conversation_item_create_event_param.py
+++ b/src/openai/types/beta/realtime/conversation_item_create_event_param.py
@@ -20,9 +20,10 @@ class ConversationItemCreateEventParam(TypedDict, total=False):
     """Optional client-generated ID used to identify this event."""
 
     previous_item_id: str
-    """The ID of the preceding item after which the new item will be inserted.
-
-    If not set, the new item will be appended to the end of the conversation. If
-    set, it allows an item to be inserted mid-conversation. If the ID cannot be
-    found, an error will be returned and the item will not be added.
+    """
+    The ID of the preceding item after which the new item will be inserted. If not
+    set, the new item will be appended to the end of the conversation. If set to
+    `root`, the new item will be added to the beginning of the conversation. If set
+    to an existing ID, it allows an item to be inserted mid-conversation. If the ID
+    cannot be found, an error will be returned and the item will not be added.
     """
diff --git a/src/openai/types/beta/realtime/session.py b/src/openai/types/beta/realtime/session.py
index 09cdbb02bc..2d028f817c 100644
--- a/src/openai/types/beta/realtime/session.py
+++ b/src/openai/types/beta/realtime/session.py
@@ -63,7 +63,12 @@ class Session(BaseModel):
     """Unique identifier for the session object."""
 
     input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
-    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
 
     input_audio_transcription: Optional[InputAudioTranscription] = None
     """
@@ -117,7 +122,11 @@ class Session(BaseModel):
     """The Realtime model used for this session."""
 
     output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
-    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
 
     temperature: Optional[float] = None
     """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
diff --git a/src/openai/types/beta/realtime/session_create_params.py b/src/openai/types/beta/realtime/session_create_params.py
index f56f2c5c22..3708efeecd 100644
--- a/src/openai/types/beta/realtime/session_create_params.py
+++ b/src/openai/types/beta/realtime/session_create_params.py
@@ -3,25 +3,19 @@
 from __future__ import annotations
 
 from typing import List, Union, Iterable
-from typing_extensions import Literal, Required, TypedDict
+from typing_extensions import Literal, TypedDict
 
 __all__ = ["SessionCreateParams", "InputAudioTranscription", "Tool", "TurnDetection"]
 
 
 class SessionCreateParams(TypedDict, total=False):
-    model: Required[
-        Literal[
-            "gpt-4o-realtime-preview",
-            "gpt-4o-realtime-preview-2024-10-01",
-            "gpt-4o-realtime-preview-2024-12-17",
-            "gpt-4o-mini-realtime-preview",
-            "gpt-4o-mini-realtime-preview-2024-12-17",
-        ]
-    ]
-    """The Realtime model used for this session."""
-
     input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
-    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
 
     input_audio_transcription: InputAudioTranscription
     """
@@ -61,8 +55,21 @@ class SessionCreateParams(TypedDict, total=False):
     To disable audio, set this to ["text"].
     """
 
+    model: Literal[
+        "gpt-4o-realtime-preview",
+        "gpt-4o-realtime-preview-2024-10-01",
+        "gpt-4o-realtime-preview-2024-12-17",
+        "gpt-4o-mini-realtime-preview",
+        "gpt-4o-mini-realtime-preview-2024-12-17",
+    ]
+    """The Realtime model used for this session."""
+
     output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
-    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
 
     temperature: float
     """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
diff --git a/src/openai/types/beta/realtime/session_update_event.py b/src/openai/types/beta/realtime/session_update_event.py
index c04220aa25..322e588a4e 100644
--- a/src/openai/types/beta/realtime/session_update_event.py
+++ b/src/openai/types/beta/realtime/session_update_event.py
@@ -65,17 +65,13 @@ class SessionTurnDetection(BaseModel):
 
 
 class Session(BaseModel):
-    model: Literal[
-        "gpt-4o-realtime-preview",
-        "gpt-4o-realtime-preview-2024-10-01",
-        "gpt-4o-realtime-preview-2024-12-17",
-        "gpt-4o-mini-realtime-preview",
-        "gpt-4o-mini-realtime-preview-2024-12-17",
-    ]
-    """The Realtime model used for this session."""
-
     input_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
-    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
 
     input_audio_transcription: Optional[SessionInputAudioTranscription] = None
     """
@@ -115,8 +111,23 @@ class Session(BaseModel):
     To disable audio, set this to ["text"].
     """
 
+    model: Optional[
+        Literal[
+            "gpt-4o-realtime-preview",
+            "gpt-4o-realtime-preview-2024-10-01",
+            "gpt-4o-realtime-preview-2024-12-17",
+            "gpt-4o-mini-realtime-preview",
+            "gpt-4o-mini-realtime-preview-2024-12-17",
+        ]
+    ] = None
+    """The Realtime model used for this session."""
+
     output_audio_format: Optional[Literal["pcm16", "g711_ulaw", "g711_alaw"]] = None
-    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
 
     temperature: Optional[float] = None
     """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
diff --git a/src/openai/types/beta/realtime/session_update_event_param.py b/src/openai/types/beta/realtime/session_update_event_param.py
index aa06069b04..c01d9b6887 100644
--- a/src/openai/types/beta/realtime/session_update_event_param.py
+++ b/src/openai/types/beta/realtime/session_update_event_param.py
@@ -71,19 +71,13 @@ class SessionTurnDetection(TypedDict, total=False):
 
 
 class Session(TypedDict, total=False):
-    model: Required[
-        Literal[
-            "gpt-4o-realtime-preview",
-            "gpt-4o-realtime-preview-2024-10-01",
-            "gpt-4o-realtime-preview-2024-12-17",
-            "gpt-4o-mini-realtime-preview",
-            "gpt-4o-mini-realtime-preview-2024-12-17",
-        ]
-    ]
-    """The Realtime model used for this session."""
-
     input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
-    """The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of input audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, input audio must
+    be 16-bit PCM at a 24kHz sample rate, single channel (mono), and little-endian
+    byte order.
+    """
 
     input_audio_transcription: SessionInputAudioTranscription
     """
@@ -123,8 +117,21 @@ class Session(TypedDict, total=False):
     To disable audio, set this to ["text"].
     """
 
+    model: Literal[
+        "gpt-4o-realtime-preview",
+        "gpt-4o-realtime-preview-2024-10-01",
+        "gpt-4o-realtime-preview-2024-12-17",
+        "gpt-4o-mini-realtime-preview",
+        "gpt-4o-mini-realtime-preview-2024-12-17",
+    ]
+    """The Realtime model used for this session."""
+
     output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"]
-    """The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`."""
+    """The format of output audio.
+
+    Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For `pcm16`, output audio is
+    sampled at a rate of 24kHz.
+    """
 
     temperature: float
     """Sampling temperature for the model, limited to [0.6, 1.2]. Defaults to 0.8."""
diff --git a/src/openai/types/chat/chat_completion.py b/src/openai/types/chat/chat_completion.py
index 4b53e70890..cb812a2702 100644
--- a/src/openai/types/chat/chat_completion.py
+++ b/src/openai/types/chat/chat_completion.py
@@ -60,11 +60,7 @@ class ChatCompletion(BaseModel):
     """The object type, which is always `chat.completion`."""
 
     service_tier: Optional[Literal["scale", "default"]] = None
-    """The service tier used for processing the request.
-
-    This field is only included if the `service_tier` parameter is specified in the
-    request.
-    """
+    """The service tier used for processing the request."""
 
     system_fingerprint: Optional[str] = None
     """This fingerprint represents the backend configuration that the model runs with.
diff --git a/src/openai/types/chat/chat_completion_assistant_message_param.py b/src/openai/types/chat/chat_completion_assistant_message_param.py
index 35e3a3d784..229fb822f4 100644
--- a/src/openai/types/chat/chat_completion_assistant_message_param.py
+++ b/src/openai/types/chat/chat_completion_assistant_message_param.py
@@ -38,8 +38,8 @@ class ChatCompletionAssistantMessageParam(TypedDict, total=False):
     """The role of the messages author, in this case `assistant`."""
 
     audio: Optional[Audio]
-    """Data about a previous audio response from the model.
-
+    """
+    Data about a previous audio response from the model.
     [Learn more](https://platform.openai.com/docs/guides/audio).
     """
 
diff --git a/src/openai/types/chat/chat_completion_chunk.py b/src/openai/types/chat/chat_completion_chunk.py
index 9ec6dc4bdb..7b0ae2e121 100644
--- a/src/openai/types/chat/chat_completion_chunk.py
+++ b/src/openai/types/chat/chat_completion_chunk.py
@@ -129,11 +129,7 @@ class ChatCompletionChunk(BaseModel):
     """The object type, which is always `chat.completion.chunk`."""
 
     service_tier: Optional[Literal["scale", "default"]] = None
-    """The service tier used for processing the request.
-
-    This field is only included if the `service_tier` parameter is specified in the
-    request.
-    """
+    """The service tier used for processing the request."""
 
     system_fingerprint: Optional[str] = None
     """
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index f168ddea6e..30d930b120 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -220,9 +220,6 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     - If set to 'default', the request will be processed using the default service
       tier with a lower uptime SLA and no latency guarentee.
     - When not set, the default behavior is 'auto'.
-
-    When this parameter is set, the response body will include the `service_tier`
-    utilized.
     """
 
     stop: Union[Optional[str], List[str]]
diff --git a/src/openai/types/embedding_create_params.py b/src/openai/types/embedding_create_params.py
index 1385762885..a90566449b 100644
--- a/src/openai/types/embedding_create_params.py
+++ b/src/openai/types/embedding_create_params.py
@@ -19,7 +19,8 @@ class EmbeddingCreateParams(TypedDict, total=False):
     (8192 tokens for `text-embedding-ada-002`), cannot be an empty string, and any
     array must be 2048 dimensions or less.
     [Example Python code](https://cookbook.openai.com/examples/how_to_count_tokens_with_tiktoken)
-    for counting tokens.
+    for counting tokens. Some models may also impose a limit on total number of
+    tokens summed across inputs.
     """
 
     model: Required[Union[str, EmbeddingModel]]
diff --git a/tests/api_resources/beta/realtime/test_sessions.py b/tests/api_resources/beta/realtime/test_sessions.py
index 65bfa27572..908aa983be 100644
--- a/tests/api_resources/beta/realtime/test_sessions.py
+++ b/tests/api_resources/beta/realtime/test_sessions.py
@@ -19,20 +19,18 @@ class TestSessions:
 
     @parametrize
     def test_method_create(self, client: OpenAI) -> None:
-        session = client.beta.realtime.sessions.create(
-            model="gpt-4o-realtime-preview",
-        )
+        session = client.beta.realtime.sessions.create()
         assert_matches_type(SessionCreateResponse, session, path=["response"])
 
     @parametrize
     def test_method_create_with_all_params(self, client: OpenAI) -> None:
         session = client.beta.realtime.sessions.create(
-            model="gpt-4o-realtime-preview",
             input_audio_format="pcm16",
             input_audio_transcription={"model": "model"},
             instructions="instructions",
             max_response_output_tokens=0,
             modalities=["text"],
+            model="gpt-4o-realtime-preview",
             output_audio_format="pcm16",
             temperature=0,
             tool_choice="tool_choice",
@@ -57,9 +55,7 @@ def test_method_create_with_all_params(self, client: OpenAI) -> None:
 
     @parametrize
     def test_raw_response_create(self, client: OpenAI) -> None:
-        response = client.beta.realtime.sessions.with_raw_response.create(
-            model="gpt-4o-realtime-preview",
-        )
+        response = client.beta.realtime.sessions.with_raw_response.create()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -68,9 +64,7 @@ def test_raw_response_create(self, client: OpenAI) -> None:
 
     @parametrize
     def test_streaming_response_create(self, client: OpenAI) -> None:
-        with client.beta.realtime.sessions.with_streaming_response.create(
-            model="gpt-4o-realtime-preview",
-        ) as response:
+        with client.beta.realtime.sessions.with_streaming_response.create() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
@@ -85,20 +79,18 @@ class TestAsyncSessions:
 
     @parametrize
     async def test_method_create(self, async_client: AsyncOpenAI) -> None:
-        session = await async_client.beta.realtime.sessions.create(
-            model="gpt-4o-realtime-preview",
-        )
+        session = await async_client.beta.realtime.sessions.create()
         assert_matches_type(SessionCreateResponse, session, path=["response"])
 
     @parametrize
     async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) -> None:
         session = await async_client.beta.realtime.sessions.create(
-            model="gpt-4o-realtime-preview",
             input_audio_format="pcm16",
             input_audio_transcription={"model": "model"},
             instructions="instructions",
             max_response_output_tokens=0,
             modalities=["text"],
+            model="gpt-4o-realtime-preview",
             output_audio_format="pcm16",
             temperature=0,
             tool_choice="tool_choice",
@@ -123,9 +115,7 @@ async def test_method_create_with_all_params(self, async_client: AsyncOpenAI) ->
 
     @parametrize
     async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
-        response = await async_client.beta.realtime.sessions.with_raw_response.create(
-            model="gpt-4o-realtime-preview",
-        )
+        response = await async_client.beta.realtime.sessions.with_raw_response.create()
 
         assert response.is_closed is True
         assert response.http_request.headers.get("X-Stainless-Lang") == "python"
@@ -134,9 +124,7 @@ async def test_raw_response_create(self, async_client: AsyncOpenAI) -> None:
 
     @parametrize
     async def test_streaming_response_create(self, async_client: AsyncOpenAI) -> None:
-        async with async_client.beta.realtime.sessions.with_streaming_response.create(
-            model="gpt-4o-realtime-preview",
-        ) as response:
+        async with async_client.beta.realtime.sessions.with_streaming_response.create() as response:
             assert not response.is_closed
             assert response.http_request.headers.get("X-Stainless-Lang") == "python"
 
diff --git a/tests/api_resources/chat/test_completions.py b/tests/api_resources/chat/test_completions.py
index 523fcc6ed9..cb899502b4 100644
--- a/tests/api_resources/chat/test_completions.py
+++ b/tests/api_resources/chat/test_completions.py
@@ -71,7 +71,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             presence_penalty=-2,
             reasoning_effort="low",
             response_format={"type": "text"},
-            seed=-9007199254740991,
+            seed=0,
             service_tier="auto",
             stop="string",
             store=True,
@@ -186,7 +186,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             presence_penalty=-2,
             reasoning_effort="low",
             response_format={"type": "text"},
-            seed=-9007199254740991,
+            seed=0,
             service_tier="auto",
             stop="string",
             store=True,
@@ -303,7 +303,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             presence_penalty=-2,
             reasoning_effort="low",
             response_format={"type": "text"},
-            seed=-9007199254740991,
+            seed=0,
             service_tier="auto",
             stop="string",
             store=True,
@@ -418,7 +418,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             presence_penalty=-2,
             reasoning_effort="low",
             response_format={"type": "text"},
-            seed=-9007199254740991,
+            seed=0,
             service_tier="auto",
             stop="string",
             store=True,
diff --git a/tests/api_resources/test_completions.py b/tests/api_resources/test_completions.py
index ad2679cabe..9ec503c1e3 100644
--- a/tests/api_resources/test_completions.py
+++ b/tests/api_resources/test_completions.py
@@ -38,7 +38,7 @@ def test_method_create_with_all_params_overload_1(self, client: OpenAI) -> None:
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9007199254740991,
+            seed=0,
             stop="\n",
             stream=False,
             stream_options={"include_usage": True},
@@ -98,7 +98,7 @@ def test_method_create_with_all_params_overload_2(self, client: OpenAI) -> None:
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9007199254740991,
+            seed=0,
             stop="\n",
             stream_options={"include_usage": True},
             suffix="test.",
@@ -160,7 +160,7 @@ async def test_method_create_with_all_params_overload_1(self, async_client: Asyn
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9007199254740991,
+            seed=0,
             stop="\n",
             stream=False,
             stream_options={"include_usage": True},
@@ -220,7 +220,7 @@ async def test_method_create_with_all_params_overload_2(self, async_client: Asyn
             max_tokens=16,
             n=1,
             presence_penalty=-2,
-            seed=-9007199254740991,
+            seed=0,
             stop="\n",
             stream_options={"include_usage": True},
             suffix="test.",