diff --git a/fern/docs/pages/manual/llms.mdx b/fern/docs/pages/manual/llms.mdx index 8b56f758d..059fb594e 100644 --- a/fern/docs/pages/manual/llms.mdx +++ b/fern/docs/pages/manual/llms.mdx @@ -37,6 +37,7 @@ llm: mode: openai openai: + api_base: # Defaults to https://api.openai.com/v1 api_key: # You could skip this configuration and use the OPENAI_API_KEY env var instead model: # Optional model to use. Default is "gpt-3.5-turbo" # Note: Open AI Models are listed here: https://platform.openai.com/docs/models @@ -55,6 +56,24 @@ Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:80 You'll notice the speed and quality of response is higher, given you are using OpenAI's servers for the heavy computations. +### Using OpenAI compatible API + +Many tools, including [LocalAI](https://localai.io/) and [vLLM](https://docs.vllm.ai/en/latest/), +support serving local models with an OpenAI compatible API. Even when overriding the `api_base`, +using the `openai` mode doesn't allow you to use custom models. Instead, you should use the `openailike` mode: + +```yaml +llm: + mode: openailike +``` + +This mode uses the same settings as the `openai` mode. + +As an example, you can follow the [vLLM quickstart guide](https://docs.vllm.ai/en/latest/getting_started/quickstart.html#openai-compatible-server) +to run an OpenAI compatible server. Then, you can run PrivateGPT using the `settings-vllm.yaml` profile: + +`PGPT_PROFILES=vllm make run` + ### Using AWS Sagemaker For a fully private & performant setup, you can choose to have both your LLM and Embeddings model deployed using Sagemaker. @@ -82,4 +101,4 @@ or `PGPT_PROFILES=sagemaker poetry run python -m private_gpt` When the server is started it will print a log *Application startup complete*. -Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API. \ No newline at end of file +Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API. diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index 45c7f8186..d6a335f8c 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -62,7 +62,21 @@ def __init__(self, settings: Settings) -> None: openai_settings = settings.openai self.llm = OpenAI( - api_key=openai_settings.api_key, model=openai_settings.model + api_base=openai_settings.api_base, + api_key=openai_settings.api_key, + model=openai_settings.model, + ) + case "openailike": + from llama_index.llms import OpenAILike + + openai_settings = settings.openai + self.llm = OpenAILike( + api_base=openai_settings.api_base, + api_key=openai_settings.api_key, + model=openai_settings.model, + is_chat_model=True, + max_tokens=None, + api_version="", ) case "mock": self.llm = MockLLM() diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 06d8a70bd..7c58a762e 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -81,7 +81,7 @@ class DataSettings(BaseModel): class LLMSettings(BaseModel): - mode: Literal["local", "openai", "sagemaker", "mock"] + mode: Literal["local", "openai", "openailike", "sagemaker", "mock"] max_new_tokens: int = Field( 256, description="The maximum number of token that the LLM is authorized to generate in one completion.", @@ -156,6 +156,10 @@ class SagemakerSettings(BaseModel): class OpenAISettings(BaseModel): + api_base: str = Field( + None, + description="Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.", + ) api_key: str model: str = Field( "gpt-3.5-turbo", diff --git a/settings-vllm.yaml b/settings-vllm.yaml new file mode 100644 index 000000000..c3907f29d --- /dev/null +++ b/settings-vllm.yaml @@ -0,0 +1,14 @@ +llm: + mode: openailike + +embedding: + mode: local + ingest_mode: simple + +local: + embedding_hf_model_name: BAAI/bge-small-en-v1.5 + +openai: + api_base: http://localhost:8000/v1 + api_key: EMPTY + model: facebook/opt-125m