Clear properly caches when clearing conversation history

snexus · Aug 30, 2024 · 9c7378b · 9c7378b
1 parent bd0cbf6
commit 9c7378b
Show file tree

Hide file tree

Showing 5 changed files with 27 additions and 2 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-llama-cpp-python==0.2.88
+llama-cpp-python==0.2.76
 chromadb~=0.5.5
 langchain~=0.2.14
 langchain-community~=0.2.12

diff --git a/sample_templates/generic/config_template.yaml b/sample_templates/generic/config_template.yaml
@@ -65,6 +65,13 @@ semantic_search:
     enabled: True
     model: "bge" # for `BAAI/bge-reranker-base` or "marco" for cross-encoder/ms-marco-MiniLM-L-6-v2
 
+  # Optionally enable conversation history settings (default False)
+  conversation_history_settings:
+    enabled: True
+    max_history_length: 3
+    rewrite_query: True
+
+
 
 persist_response_db_path:  "/path/to/responses.db" # optional sqlite database filename. Allows to save responses offlien to sqlite, for future analysis.
 

diff --git a/sample_templates/pdf_library.yaml b/sample_templates/pdf_library.yaml
@@ -34,3 +34,8 @@ semantic_search:
   query_prefix: "query: "
   hyde:
     enabled: True
+
+  conversation_history_settings:
+    enabled: True
+    max_history_length: 3
+    rewrite_query: True
diff --git a/src/llmsearch/utils.py b/src/llmsearch/utils.py
@@ -17,6 +17,7 @@
 from langchain_core.runnables.base import RunnableSequence
 from langchain_core.output_parsers.string import StrOutputParser
 from langchain_core.prompts import PromptTemplate
+from langchain_core.caches import BaseCache, InMemoryCache
 
 CHAIN_TYPE = "stuff"
 
@@ -35,6 +36,7 @@ class LLMBundle:
     multiquery_chain: Optional[RunnableSequence] = None
     multiquery_enabled: bool = False
     history_contextualization_chain: Optional[RunnableSequence] = None
+    llm_cache: Optional[BaseCache] = None
 
 
 def set_cache_folder(cache_folder_root: str):
@@ -115,7 +117,8 @@ def get_llm_bundle(config: Config) -> LLMBundle:
         multiquery_chain=multiquery_chain,
         multiquery_enabled=config.semantic_search.multiquery.enabled,
         conversation_history_settings=config.semantic_search.conversation_history_settings, 
-        history_contextualization_chain = history_contextualization_chain
+        history_contextualization_chain = history_contextualization_chain,
+        llm_cache=InMemoryCache()
     )
 
 

diff --git a/src/llmsearch/webapp.py b/src/llmsearch/webapp.py
@@ -12,6 +12,7 @@
 from dotenv import load_dotenv
 from loguru import logger
 from streamlit import chat_message
+from langchain.globals import set_llm_cache
 
 from llmsearch.config import Config
 from llmsearch.chroma import VectorStoreChroma
@@ -160,6 +161,12 @@ def unload_model():
 def clear_chat_history():
     if st.session_state["llm_bundle"] is not None:
         st.session_state["llm_bundle"].conversation_history_settings.history = []
+
+        # Clear LLM Cache
+        st.session_state["llm_bundle"].llm_cache.clear()
+
+        # Clear Streamlit Cache
+        st.cache_data.clear()
 
 @st.cache_data
 def generate_response(
@@ -215,7 +222,10 @@ def reload_model(doc_config_path: str, model_config_file: str):
             update_embeddings_button = st.button(
                 "Generate", on_click=generate_index, args=(config,), type="secondary"
             )
+
 
+    logger.debug("Setting LLM Cache")
+    set_llm_cache(st.session_state["llm_bundle"].llm_cache)
     st.session_state["disable_load"] = False