openai streaming

DaveCoDev · Feb 8, 2025 · 84ead84 · 84ead84
1 parent 961c27d
commit 84ead84
Show file tree

Hide file tree

Showing 8 changed files with 661 additions and 49 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -5,7 +5,7 @@
     "version": "0.2.0",
     "configurations": [
         {
-            "name": "Python: Current File",
+            "name": "Python Debugger: Current File",
             "type": "debugpy",
             "request": "launch",
             "program": "${file}",

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "not-again-ai"
-version = "0.16.1"
+version = "0.17.0"
 description = "Designed to once and for all collect all the little things that come up over and over again in AI projects and put them in one place."
 authors = [
     { name = "DaveCoDev", email = "[email protected]" }
@@ -70,6 +70,7 @@ nox-poetry = "*"
 
 [tool.poetry.group.test.dependencies]
 pytest = "*"
+pytest-asyncio = "*"
 pytest-cov = "*"
 pytest-randomly = "*"
 
@@ -153,6 +154,7 @@ filterwarnings = [
     # "ignore::DeprecationWarning:typer",
     "ignore::pytest.PytestUnraisableExceptionWarning"
 ]
+asyncio_mode = "auto"
 
 [tool.coverage.run]
 branch = true
diff --git a/src/not_again_ai/llm/chat_completion/__init__.py b/src/not_again_ai/llm/chat_completion/__init__.py
@@ -1,4 +1,4 @@
-from not_again_ai.llm.chat_completion.interface import chat_completion
+from not_again_ai.llm.chat_completion.interface import chat_completion, chat_completion_stream
 from not_again_ai.llm.chat_completion.types import ChatCompletionRequest
 
-__all__ = ["ChatCompletionRequest", "chat_completion"]
+__all__ = ["ChatCompletionRequest", "chat_completion", "chat_completion_stream"]
diff --git a/src/not_again_ai/llm/chat_completion/interface.py b/src/not_again_ai/llm/chat_completion/interface.py
@@ -1,9 +1,9 @@
-from collections.abc import Callable
+from collections.abc import AsyncGenerator, Callable
 from typing import Any
 
 from not_again_ai.llm.chat_completion.providers.ollama_api import ollama_chat_completion
-from not_again_ai.llm.chat_completion.providers.openai_api import openai_chat_completion
-from not_again_ai.llm.chat_completion.types import ChatCompletionRequest, ChatCompletionResponse
+from not_again_ai.llm.chat_completion.providers.openai_api import openai_chat_completion, openai_chat_completion_stream
+from not_again_ai.llm.chat_completion.types import ChatCompletionChunk, ChatCompletionRequest, ChatCompletionResponse
 
 
 def chat_completion(
@@ -30,3 +30,29 @@ def chat_completion(
         return ollama_chat_completion(request, client)
     else:
         raise ValueError(f"Provider {provider} not supported")
+
+
+async def chat_completion_stream(
+    request: ChatCompletionRequest,
+    provider: str,
+    client: Callable[..., Any],
+) -> AsyncGenerator[ChatCompletionChunk, None]:
+    """Stream a chat completion response from the given provider. Currently supported providers:
+    - `openai` - OpenAI
+    - `azure_openai` - Azure OpenAI
+    - `ollama` - Ollama
+
+    Args:
+        request: Request parameter object
+        provider: The supported provider name
+        client: Client information, see the provider's implementation for what can be provided
+
+    Returns:
+        AsyncGenerator[ChatCompletionChunk, None]
+    """
+    request.stream = True
+    if provider == "openai" or provider == "azure_openai":
+        async for chunk in openai_chat_completion_stream(request, client):
+            yield chunk
+    else:
+        raise ValueError(f"Provider {provider} not supported")