zylon-ai · imartinez · Dec 26, 2023 · Dec 22, 2023 · Dec 22, 2023
diff --git a/fern/docs/pages/manual/llms.mdx b/fern/docs/pages/manual/llms.mdx
@@ -37,6 +37,7 @@ llm:
   mode: openai
 
 openai:
+  api_base: <openai-api-base-url> # Defaults to https://api.openai.com/v1
   api_key: <your_openai_api_key>  # You could skip this configuration and use the OPENAI_API_KEY env var instead
   model: <openai_model_to_use> # Optional model to use. Default is "gpt-3.5-turbo"
                                # Note: Open AI Models are listed here: https://platform.openai.com/docs/models
@@ -55,6 +56,24 @@ Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:80
 You'll notice the speed and quality of response is higher, given you are using OpenAI's servers for the heavy
 computations.
 
+### Using OpenAI compatible API
+
+Many tools, including [LocalAI](https://localai.io/) and [vLLM](https://docs.vllm.ai/en/latest/),
+support serving local models with an OpenAI compatible API. Even when overriding the `api_base`,
+using the `openai` mode doesn't allow you to use custom models. Instead, you should use the `openailike` mode:
+
+```yaml
+llm:
+  mode: openailike
+```
+
+This mode uses the same settings as the `openai` mode.
+
+As an example, you can follow the [vLLM quickstart guide](https://docs.vllm.ai/en/latest/getting_started/quickstart.html#openai-compatible-server)
+to run an OpenAI compatible server. Then, you can run PrivateGPT using the `settings-vllm.yaml` profile:
+
+`PGPT_PROFILES=vllm make run`
+
 ### Using AWS Sagemaker
 
 For a fully private & performant setup, you can choose to have both your LLM and Embeddings model deployed using Sagemaker.
@@ -82,4 +101,4 @@ or
 `PGPT_PROFILES=sagemaker poetry run python -m private_gpt`
 
 When the server is started it will print a log *Application startup complete*.
-Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
+Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API.
diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py
@@ -62,7 +62,21 @@ def __init__(self, settings: Settings) -> None:
 
                 openai_settings = settings.openai
                 self.llm = OpenAI(
-                    api_key=openai_settings.api_key, model=openai_settings.model
+                    api_base=openai_settings.api_base,
+                    api_key=openai_settings.api_key,
+                    model=openai_settings.model,
+                )
+            case "openailike":
+                from llama_index.llms import OpenAILike
+
+                openai_settings = settings.openai
+                self.llm = OpenAILike(
+                    api_base=openai_settings.api_base,
+                    api_key=openai_settings.api_key,
+                    model=openai_settings.model,
+                    is_chat_model=True,
+                    max_tokens=None,
+                    api_version="",
                 )
             case "mock":
                 self.llm = MockLLM()
diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -81,7 +81,7 @@ class DataSettings(BaseModel):
 
 
 class LLMSettings(BaseModel):
-    mode: Literal["local", "openai", "sagemaker", "mock"]
+    mode: Literal["local", "openai", "openailike", "sagemaker", "mock"]
     max_new_tokens: int = Field(
         256,
         description="The maximum number of token that the LLM is authorized to generate in one completion.",
@@ -156,6 +156,10 @@ class SagemakerSettings(BaseModel):
 
 
 class OpenAISettings(BaseModel):
+    api_base: str = Field(
+        None,
+        description="Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.",
+    )
     api_key: str
     model: str = Field(
         "gpt-3.5-turbo",

diff --git a/settings-vllm.yaml b/settings-vllm.yaml
@@ -0,0 +1,14 @@
+llm:
+  mode: openailike
+
+embedding:
+  mode: local
+  ingest_mode: simple
+
+local:
+  embedding_hf_model_name: BAAI/bge-small-en-v1.5
+
+openai:
+  api_base: http://localhost:8000/v1
+  api_key: EMPTY
+  model: facebook/opt-125m