From 55385257d0113840e1ea79dfe9636af36f912c34 Mon Sep 17 00:00:00 2001
From: Denis L <lepchev@gmail.com>
Date: Thu, 19 Oct 2023 23:49:45 +0800
Subject: [PATCH 1/2] Initial version of history

---
 src/llmsearch/webapp.py | 50 +++++++++++++++++++++++++++--------------
 1 file changed, 33 insertions(+), 17 deletions(-)

diff --git a/src/llmsearch/webapp.py b/src/llmsearch/webapp.py
index a38aa6a..d0036a3 100644
--- a/src/llmsearch/webapp.py
+++ b/src/llmsearch/webapp.py
@@ -68,10 +68,11 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str
 if args.cli_config_path:
     config_file = args.cli_config_path
 else:
-    config_file = st.sidebar.file_uploader(
-        "Select tempate to load", type=["yml", "yaml"]
-    )
+    config_file = st.sidebar.file_uploader("Select tempate to load", type=["yml", "yaml"])
 
+# Initialsize state for historical resutls
+if "messages" not in st.session_state:
+    st.session_state["messages"] = []
 
 if config_file is not None:
     config = load_config(config_file)
@@ -82,29 +83,34 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str
 
     st.sidebar.write(f"**Model type:** {config.llm.type}")
 
-    st.sidebar.write(
-        f"**Document path**: {config.embeddings.document_settings[0].doc_path}"
-    )
+    st.sidebar.write(f"**Document path**: {config.embeddings.document_settings[0].doc_path}")
     st.sidebar.write(f"**Embedding path:** {config.embeddings.embeddings_path}")
-    st.sidebar.write(
-        f"**Max char size (semantic search):** {config.semantic_search.max_char_size}"
-    )
+    st.sidebar.write(f"**Max char size (semantic search):** {config.semantic_search.max_char_size}")
     label_filter = ""
     if config.embeddings.labels:
-        label_filter = st.sidebar.selectbox(label="Filter by label", options = ["-"] + config.embeddings.labels)
-        if label_filter is None or label_filter == '-':
+        label_filter = st.sidebar.selectbox(label="Filter by label", options=["-"] + config.embeddings.labels)
+        if label_filter is None or label_filter == "-":
             label_filter = ""
-    
+
     llm_bundle = get_bundle(config)
 
     text = st.chat_input("Enter text")
-    is_hyde = st.sidebar.checkbox(label = "Use HyDE (cost: 2 api calls)", value=llm_bundle.hyde_enabled)
+    is_hyde = st.sidebar.checkbox(label="Use HyDE (cost: 2 api calls)", value=llm_bundle.hyde_enabled)
 
     if text:
-
         # Dynamically switch hyde
         llm_bundle.hyde_enabled = is_hyde
-        output = generate_response(question=text, _bundle=llm_bundle, _config=config, label_filter = label_filter)
+        output = generate_response(question=text, _bundle=llm_bundle, _config=config, label_filter=label_filter)
+
+        # Take care of historical response
+        for h_response in st.session_state["messages"]:
+            with st.expander(label=f":question: **{h_response['question']}**", expanded=False):
+                st.markdown(f"##### {h_response['question']}")
+                st.write(h_response["response"])
+                st.markdown(f"\n---\n##### Search Quality Score: {h_response['quality']}")
+                st.markdown("##### Links")
+                for link in h_response["links"]:
+                    st.write("\t* " + link, unsafe_allow_html=True)
 
         for source in output.semantic_search[::-1]:
             source_path = source.metadata.pop("source")
@@ -119,12 +125,22 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str
 
                 st.text(f"\n\n{source.chunk_text}")
         if llm_bundle.hyde_enabled:
-            with st.expander(label=':octagonal_sign: **HyDE Reponse**', expanded=False):
+            with st.expander(label=":octagonal_sign: **HyDE Reponse**", expanded=False):
                 st.write(output.question)
 
         with chat_message("assistant"):
             st.write(f"**Search results quality score: {output.average_score:.2f}**\n")
-            st.write(output.response)
+            st.write(output.response)  # Add user message to chat history
+
+            # Add assistant response to chat history
+        st.session_state["messages"].append(
+            {
+                "question": text,
+                "response": output.response,
+                "links": [f'<a href="{s.chunk_link}">{s.chunk_link}</a>' for s in output.semantic_search[::-1]],
+                "quality": f"{output.average_score:.2f}",
+            }
+        )
 
 else:
     st.info("Choose a configuration template to start...")

From d727c3a06ceaaa4cdfdac0c2fd016b2b7a32698f Mon Sep 17 00:00:00 2001
From: Denis L <lepchev@gmail.com>
Date: Fri, 20 Oct 2023 19:34:31 +0800
Subject: [PATCH 2/2] Fix webapp issues and update docs

---
 README.md                             |  6 +++--
 sample_templates/config_template.yaml |  2 +-
 src/llmsearch/webapp.py               | 32 ++++++++++++++++-----------
 3 files changed, 24 insertions(+), 16 deletions(-)

diff --git a/README.md b/README.md
index 29a583e..326c9b4 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@ The purpose of this package is to offer a convenient question-answering system w
 * Supports the "Retrieve and Re-rank" strategy for semantic search, see - https://www.sbert.net/examples/applications/retrieve_rerank/README.html.
 
 * Supports HyDE (Hypothetical Document Embeddings) - https://arxiv.org/pdf/2212.10496.pdf
+    * WARNING: Enabling HyDE can significantly alter the quality of the results. Please make sure to read the paper before enabling.
 
 * Allows interaction with embedded documents, supporting the following models and methods (including locally hosted):
     * OpenAI models (ChatGPT 3.5/4 and Azure OpenAI).
@@ -83,7 +84,8 @@ pip install . # or `pip install -e .` for development
 
 To create a configuration file in YAML format, you can refer to the example template provided in `sample_templates/config_template.yaml`.
 
-The sample configuration file specifies how to load one of the supported locally hosted models, downloaded from Huggingface - https://huggingface.co/TheBloke/wizardLM-13B-1.0-GGML/resolve/main/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin
+The sample configuration file specifies how to load one of the supported locally hosted models, downloaded from Huggingface - 
+https://huggingface.co/TheBloke/airoboros-l2-13B-gpt4-1.4.1-GGUF/resolve/main/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf
 
 As an alternative uncomment the llm section for OpenAI model.
 
@@ -130,7 +132,7 @@ llmsearch interact llm -c /path/to/config.yaml
 
 Based on the example configuration provided in the sample configuration file, the following actions will take place:
 
-- The system will load a quantized GGML model using the LlamaCpp framework. The model file is located at `/storage/llm/cache/WizardLM-13B-1.0-GGML/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin`.
+- The system will load a quantized GGUF model using the LlamaCpp framework. The model file is located at `/storage/llm/cache/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf`
 - The model will be partially loaded into the GPU (30 layers) and partially into the CPU (remaining layers). The `n_gpu_layers` parameter can be adjusted according to the hardware limitations.
 - Additional LlamaCpp specific parameters specified in `model_kwargs` from the `llm->params` section will be passed to the model.
 - The system will query the embeddings database using hybrid search algorithm using sparse and dense embeddings. It will provide the most relevant context from different documents, up to a maximum context size of 4096 characters (`max_char_size` in `semantic_search`).
diff --git a/sample_templates/config_template.yaml b/sample_templates/config_template.yaml
index ed37562..0945f9a 100644
--- a/sample_templates/config_template.yaml
+++ b/sample_templates/config_template.yaml
@@ -69,7 +69,7 @@ persist_response_db_path:  "/path/to/responses.db" # optional sqlite database fi
 llm:
   type: llamacpp
   params:
-    model_path: /storage/llm/cache/WizardLM-13B-1.0-GGML/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin
+    model_path: /storage/llm/cache/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf
     prompt_template: |
           ### Instruction:
           Use the following pieces of context to answer the question at the end. If answer isn't in the context, say that you don't know, don't try to make up an answer.
diff --git a/src/llmsearch/webapp.py b/src/llmsearch/webapp.py
index d0036a3..be39ca2 100644
--- a/src/llmsearch/webapp.py
+++ b/src/llmsearch/webapp.py
@@ -54,7 +54,7 @@ def get_bundle(config):
 
 
 @st.cache_data
-def generate_response(question: str, _config: Config, _bundle, label_filter: str = ""):
+def generate_response(question: str, use_hyde: bool, _config: Config, _bundle, label_filter: str = ""):
     # _config and _bundle are under scored so paratemeters aren't hashed
 
     output = get_and_parse_response(query=question, config=_config, llm_bundle=_bundle, label=label_filter)
@@ -74,6 +74,7 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str
 if "messages" not in st.session_state:
     st.session_state["messages"] = []
 
+
 if config_file is not None:
     config = load_config(config_file)
 
@@ -100,14 +101,28 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str
     if text:
         # Dynamically switch hyde
         llm_bundle.hyde_enabled = is_hyde
-        output = generate_response(question=text, _bundle=llm_bundle, _config=config, label_filter=label_filter)
+        output = generate_response(
+            question=text,
+            use_hyde=llm_bundle.hyde_enabled,
+            _bundle=llm_bundle,
+            _config=config,
+            label_filter=label_filter,
+        )
 
-        # Take care of historical response
+        # Add assistant response to chat history
+        st.session_state["messages"].append(
+            {
+                "question": text,
+                "response": output.response,
+                "links": [f'<a href="{s.chunk_link}">{s.chunk_link}</a>' for s in output.semantic_search[::-1]],
+                "quality": f"{output.average_score:.2f}",
+            }
+        )
         for h_response in st.session_state["messages"]:
             with st.expander(label=f":question: **{h_response['question']}**", expanded=False):
                 st.markdown(f"##### {h_response['question']}")
                 st.write(h_response["response"])
-                st.markdown(f"\n---\n##### Search Quality Score: {h_response['quality']}")
+                st.markdown(f"\n---\n##### Serrch Quality Score: {h_response['quality']}")
                 st.markdown("##### Links")
                 for link in h_response["links"]:
                     st.write("\t* " + link, unsafe_allow_html=True)
@@ -132,15 +147,6 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str
             st.write(f"**Search results quality score: {output.average_score:.2f}**\n")
             st.write(output.response)  # Add user message to chat history
 
-            # Add assistant response to chat history
-        st.session_state["messages"].append(
-            {
-                "question": text,
-                "response": output.response,
-                "links": [f'<a href="{s.chunk_link}">{s.chunk_link}</a>' for s in output.semantic_search[::-1]],
-                "quality": f"{output.average_score:.2f}",
-            }
-        )
 
 else:
     st.info("Choose a configuration template to start...")