diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py index ae8cf0089..d47879d3a 100644 --- a/private_gpt/server/chat/chat_service.py +++ b/private_gpt/server/chat/chat_service.py @@ -100,6 +100,7 @@ def __init__( embed_model=embedding_component.embedding_model, show_progress=True, ) + self.default_context_template = settings.rag.default_context_template def _chat_engine( self, @@ -109,6 +110,10 @@ def _chat_engine( ) -> BaseChatEngine: settings = self.settings if use_context: + if self.default_context_template is not None: + context_template = self.default_context_template + else: + context_template = None vector_index_retriever = self.vector_store_component.get_retriever( index=self.index, context_filter=context_filter, @@ -132,6 +137,7 @@ def _chat_engine( retriever=vector_index_retriever, llm=self.llm_component.llm, # Takes no effect at the moment node_postprocessors=node_postprocessors, + context_template=context_template, ) else: return SimpleChatEngine.from_defaults( diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py index 30d816b84..b0f6698e8 100644 --- a/private_gpt/settings/settings.py +++ b/private_gpt/settings/settings.py @@ -345,6 +345,13 @@ class RerankSettings(BaseModel): class RagSettings(BaseModel): + default_context_template: str | None = Field( + None, + description=( + "The default context template to use for the chat engine when using RAG. " + "If none is given - use the default system prompt (from the llama_index). " + ), + ) similarity_top_k: int = Field( 2, description="This value controls the number of documents returned by the RAG pipeline or considered for reranking if enabled.", diff --git a/settings.yaml b/settings.yaml index 8d882f73c..e45a1ba49 100644 --- a/settings.yaml +++ b/settings.yaml @@ -44,6 +44,11 @@ llm: temperature: 0.1 # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1) rag: + default_context_template: | + Context information is below. + -------------------- + {context_str} + -------------------- similarity_top_k: 2 #This value controls how many "top" documents the RAG returns to use in the context. #similarity_value: 0.45 @@ -54,11 +59,11 @@ rag: top_n: 1 clickhouse: - host: localhost - port: 8443 - username: admin - password: clickhouse - database: embeddings + host: localhost + port: 8443 + username: admin + password: clickhouse + database: embeddings llamacpp: llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF @@ -125,3 +130,4 @@ gemini: api_key: ${GOOGLE_API_KEY:} model: models/gemini-pro embedding_model: models/embedding-001 +