From 2d27a9f956d672cb1fe715cf0acdd35c37f378a5 Mon Sep 17 00:00:00 2001 From: Matthew Hill Date: Tue, 26 Dec 2023 04:26:08 -0500 Subject: [PATCH] feat(llm): Add openailike llm mode (#1447) This mode behaves the same as the openai mode, except that it allows setting custom models not supported by OpenAI. It can be used with any tool that serves models from an OpenAI compatible API. Implements #1424 --- fern/docs/pages/manual/llms.mdx | 21 ++++++++++++++++++++- private_gpt/components/llm/llm_component.py | 16 +++++++++++++++- private_gpt/settings/settings.py | 6 +++++- settings-vllm.yaml | 14 ++++++++++++++ 4 files changed, 54 insertions(+), 3 deletions(-) create mode 100644 settings-vllm.yaml diff --git a/fern/docs/pages/manual/llms.mdx b/fern/docs/pages/manual/llms.mdx index 8b56f758d..059fb594e 100644 --- a/fern/docs/pages/manual/llms.mdx +++ b/fern/docs/pages/manual/llms.mdx @@ -37,6 +37,7 @@ llm: mode: openai openai: + api_base: # Defaults to https://api.openai.com/v1 api_key: # You could skip this configuration and use the OPENAI_API_KEY env var instead model: # Optional model to use. Default is "gpt-3.5-turbo" # Note: Open AI Models are listed here: https://platform.openai.com/docs/models @@ -55,6 +56,24 @@ Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:80 You'll notice the speed and quality of response is higher, given you are using OpenAI's servers for the heavy computations. +### Using OpenAI compatible API + +Many tools, including [LocalAI](https://localai.io/) and [vLLM](https://docs.vllm.ai/en/latest/), +support serving local models with an OpenAI compatible API. Even when overriding the `api_base`, +using the `openai` mode doesn't allow you to use custom models. Instead, you should use the `openailike` mode: + +```yaml +llm: + mode: openailike +``` + +This mode uses the same settings as the `openai` mode. + +As an example, you can follow the [vLLM quickstart guide](https://docs.vllm.ai/en/latest/getting_started/quickstart.html#openai-compatible-server) +to run an OpenAI compatible server. Then, you can run PrivateGPT using the `settings-vllm.yaml` profile: + +`PGPT_PROFILES=vllm make run` + ### Using AWS Sagemaker For a fully private & performant setup, you can choose to have both your LLM and Embeddings model deployed using Sagemaker. @@ -82,4 +101,4 @@ or `PGPT_PROFILES=sagemaker poetry run python -m private_gpt` When the server is started it will print a log *Application startup complete*. -Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API. \ No newline at end of file +Navigate to http://localhost:8001 to use the Gradio UI or to http://localhost:8001/docs (API section) to try the API. diff --git a/private_gpt/components/llm/llm_component.py b/private_gpt/components/llm/llm_component.py index 45c7f8186..d6a335f8c 100644 --- a/private_gpt/components/llm/llm_component.py +++ b/private_gpt/components/llm/llm_component.py @@ -62,7 +62,21 @@ def __init__(self, settings: Settings) -> None: openai_settings = settings.openai self.llm = OpenAI( - api_key=openai_settings.api_key, model=openai_settings.model + api_base=openai_settings.api_base, + api_key=openai_settings.api_key, + model=openai_settings.model, + ) + case "openailike": + from llama_index.llms import OpenAILike + + openai_settings = settings.openai + self.llm = OpenAILike( + api_base=openai_settings.api_base, + api_key=openai_settings.api_key, + model=openai_settings.model, + is_chat_model=True, + max_tokens=None, + api_version="", ) case "mock": self.llm = MockLLM() diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 06d8a70bd..7c58a762e 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -81,7 +81,7 @@ class DataSettings(BaseModel): class LLMSettings(BaseModel): - mode: Literal["local", "openai", "sagemaker", "mock"] + mode: Literal["local", "openai", "openailike", "sagemaker", "mock"] max_new_tokens: int = Field( 256, description="The maximum number of token that the LLM is authorized to generate in one completion.", @@ -156,6 +156,10 @@ class SagemakerSettings(BaseModel): class OpenAISettings(BaseModel): + api_base: str = Field( + None, + description="Base URL of OpenAI API. Example: 'https://api.openai.com/v1'.", + ) api_key: str model: str = Field( "gpt-3.5-turbo", diff --git a/settings-vllm.yaml b/settings-vllm.yaml new file mode 100644 index 000000000..c3907f29d --- /dev/null +++ b/settings-vllm.yaml @@ -0,0 +1,14 @@ +llm: + mode: openailike + +embedding: + mode: local + ingest_mode: simple + +local: + embedding_hf_model_name: BAAI/bge-small-en-v1.5 + +openai: + api_base: http://localhost:8000/v1 + api_key: EMPTY + model: facebook/opt-125m