Skip to content

Commit

Permalink
provide long connection (reasoning friendly) socket defaults in opena…
Browse files Browse the repository at this point in the history
…i http client (#1410)

Co-authored-by: jjallaire <[email protected]>
  • Loading branch information
jjallaire-aisi and jjallaire authored Feb 26, 2025
1 parent f914e68 commit 9c9bd5b
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
- Reasoning: `reasoning_tokens` for setting maximum reasoning tokens (currently only supported by Claude Sonnet 3.7)
- Reasoning: `reasoning_history` can now be specified as "none", "all", "last", or "auto" (which yields a provider specific recommended default).
- Web Browser: [Various improvements](https://github.com/UKGovernmentBEIS/inspect_ai/pull/1314) to performance and robustness along with several bug fixes.
- OpenAI: Provide long connection (reasoning friendly) socket defaults in http client
- OpenAI: Capture `reasoning_tokens` when reported.
- OpenAI: Retry on rate limit requests with "Request too large".
- OpenAI: Tolerate `None` for assistant content (can happen when there is a refusal).
Expand Down
45 changes: 45 additions & 0 deletions src/inspect_ai/model/_providers/openai.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
import os
import socket
from logging import getLogger
from typing import Any

import httpx
from openai import (
DEFAULT_CONNECTION_LIMITS,
DEFAULT_TIMEOUT,
APIConnectionError,
APITimeoutError,
AsyncAzureOpenAI,
Expand Down Expand Up @@ -102,6 +106,9 @@ def __init__(
],
)

# create async http client
http_client = OpenAIAsyncHttpxClient()

# azure client
if self.is_azure():
# resolve base_url
Expand All @@ -126,6 +133,7 @@ def __init__(
max_retries=(
config.max_retries if config.max_retries else DEFAULT_MAX_RETRIES
),
http_client=http_client,
**model_args,
)
else:
Expand All @@ -135,6 +143,7 @@ def __init__(
max_retries=(
config.max_retries if config.max_retries else DEFAULT_MAX_RETRIES
),
http_client=http_client,
**model_args,
)

Expand Down Expand Up @@ -350,3 +359,39 @@ def handle_bad_request(self, e: BadRequestError) -> ModelOutput | Exception:
)
else:
return e


class OpenAIAsyncHttpxClient(httpx.AsyncClient):
"""Custom async client that deals better with long running Async requests.
Based on Anthropic DefaultAsyncHttpClient implementation that they
released along with Claude 3.7 as well as the OpenAI DefaultAsyncHttpxClient
"""

def __init__(self, **kwargs: Any) -> None:
# This is based on the openai DefaultAsyncHttpxClient:
# https://github.com/openai/openai-python/commit/347363ed67a6a1611346427bb9ebe4becce53f7e
kwargs.setdefault("timeout", DEFAULT_TIMEOUT)
kwargs.setdefault("limits", DEFAULT_CONNECTION_LIMITS)
kwargs.setdefault("follow_redirects", True)

# This is based on the anthrpopic changes for claude 3.7:
# https://github.com/anthropics/anthropic-sdk-python/commit/c5387e69e799f14e44006ea4e54fdf32f2f74393#diff-3acba71f89118b06b03f2ba9f782c49ceed5bb9f68d62727d929f1841b61d12bR1387-R1403

# set socket options to deal with long running reasoning requests
socket_options = [
(socket.SOL_SOCKET, socket.SO_KEEPALIVE, True),
(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 60),
(socket.IPPROTO_TCP, socket.TCP_KEEPCNT, 5),
]
TCP_KEEPIDLE = getattr(socket, "TCP_KEEPIDLE", None)
if TCP_KEEPIDLE is not None:
socket_options.append((socket.IPPROTO_TCP, TCP_KEEPIDLE, 60))

kwargs["transport"] = httpx.AsyncHTTPTransport(
limits=DEFAULT_CONNECTION_LIMITS,
socket_options=socket_options,
)

super().__init__(**kwargs)

0 comments on commit 9c9bd5b

Please sign in to comment.