Merge pull request #59 from snexus/feature-web-chat-history

Add chat history in the web app
snexus · Oct 22, 2023 · f37e0ad · f37e0ad
2 parents 676c8e7 + d727c3a
commit f37e0ad
Show file tree

Hide file tree

Showing 3 changed files with 45 additions and 21 deletions.
diff --git a/README.md b/README.md
@@ -25,6 +25,7 @@ The purpose of this package is to offer a convenient question-answering system w
 * Supports the "Retrieve and Re-rank" strategy for semantic search, see - https://www.sbert.net/examples/applications/retrieve_rerank/README.html.
 
 * Supports HyDE (Hypothetical Document Embeddings) - https://arxiv.org/pdf/2212.10496.pdf
+    * WARNING: Enabling HyDE can significantly alter the quality of the results. Please make sure to read the paper before enabling.
 
 * Allows interaction with embedded documents, supporting the following models and methods (including locally hosted):
     * OpenAI models (ChatGPT 3.5/4 and Azure OpenAI).
@@ -83,7 +84,8 @@ pip install . # or `pip install -e .` for development
 
 To create a configuration file in YAML format, you can refer to the example template provided in `sample_templates/config_template.yaml`.
 
-The sample configuration file specifies how to load one of the supported locally hosted models, downloaded from Huggingface - https://huggingface.co/TheBloke/wizardLM-13B-1.0-GGML/resolve/main/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin
+The sample configuration file specifies how to load one of the supported locally hosted models, downloaded from Huggingface - 
+https://huggingface.co/TheBloke/airoboros-l2-13B-gpt4-1.4.1-GGUF/resolve/main/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf
 
 As an alternative uncomment the llm section for OpenAI model.
 
@@ -130,7 +132,7 @@ llmsearch interact llm -c /path/to/config.yaml
 
 Based on the example configuration provided in the sample configuration file, the following actions will take place:
 
-- The system will load a quantized GGML model using the LlamaCpp framework. The model file is located at `/storage/llm/cache/WizardLM-13B-1.0-GGML/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin`.
+- The system will load a quantized GGUF model using the LlamaCpp framework. The model file is located at `/storage/llm/cache/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf`
 - The model will be partially loaded into the GPU (30 layers) and partially into the CPU (remaining layers). The `n_gpu_layers` parameter can be adjusted according to the hardware limitations.
 - Additional LlamaCpp specific parameters specified in `model_kwargs` from the `llm->params` section will be passed to the model.
 - The system will query the embeddings database using hybrid search algorithm using sparse and dense embeddings. It will provide the most relevant context from different documents, up to a maximum context size of 4096 characters (`max_char_size` in `semantic_search`).

diff --git a/sample_templates/config_template.yaml b/sample_templates/config_template.yaml
@@ -69,7 +69,7 @@ persist_response_db_path:  "/path/to/responses.db" # optional sqlite database fi
 llm:
   type: llamacpp
   params:
-    model_path: /storage/llm/cache/WizardLM-13B-1.0-GGML/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin
+    model_path: /storage/llm/cache/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf
     prompt_template: |
           ### Instruction:
           Use the following pieces of context to answer the question at the end. If answer isn't in the context, say that you don't know, don't try to make up an answer.

diff --git a/src/llmsearch/webapp.py b/src/llmsearch/webapp.py
@@ -54,7 +54,7 @@ def get_bundle(config):
 
 
 @st.cache_data
-def generate_response(question: str, _config: Config, _bundle, label_filter: str = ""):
+def generate_response(question: str, use_hyde: bool, _config: Config, _bundle, label_filter: str = ""):
     # _config and _bundle are under scored so paratemeters aren't hashed
 
     output = get_and_parse_response(query=question, config=_config, llm_bundle=_bundle, label=label_filter)
@@ -68,9 +68,11 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str
 if args.cli_config_path:
     config_file = args.cli_config_path
 else:
-    config_file = st.sidebar.file_uploader(
-        "Select tempate to load", type=["yml", "yaml"]
-    )
+    config_file = st.sidebar.file_uploader("Select tempate to load", type=["yml", "yaml"])
+
+# Initialsize state for historical resutls
+if "messages" not in st.session_state:
+    st.session_state["messages"] = []
 
 
 if config_file is not None:
@@ -82,29 +84,48 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str
 
     st.sidebar.write(f"**Model type:** {config.llm.type}")
 
-    st.sidebar.write(
-        f"**Document path**: {config.embeddings.document_settings[0].doc_path}"
-    )
+    st.sidebar.write(f"**Document path**: {config.embeddings.document_settings[0].doc_path}")
     st.sidebar.write(f"**Embedding path:** {config.embeddings.embeddings_path}")
-    st.sidebar.write(
-        f"**Max char size (semantic search):** {config.semantic_search.max_char_size}"
-    )
+    st.sidebar.write(f"**Max char size (semantic search):** {config.semantic_search.max_char_size}")
     label_filter = ""
     if config.embeddings.labels:
-        label_filter = st.sidebar.selectbox(label="Filter by label", options = ["-"] + config.embeddings.labels)
-        if label_filter is None or label_filter == '-':
+        label_filter = st.sidebar.selectbox(label="Filter by label", options=["-"] + config.embeddings.labels)
+        if label_filter is None or label_filter == "-":
             label_filter = ""
-    
+
     llm_bundle = get_bundle(config)
 
     text = st.chat_input("Enter text")
-    is_hyde = st.sidebar.checkbox(label = "Use HyDE (cost: 2 api calls)", value=llm_bundle.hyde_enabled)
+    is_hyde = st.sidebar.checkbox(label="Use HyDE (cost: 2 api calls)", value=llm_bundle.hyde_enabled)
 
     if text:
-
         # Dynamically switch hyde
         llm_bundle.hyde_enabled = is_hyde
-        output = generate_response(question=text, _bundle=llm_bundle, _config=config, label_filter = label_filter)
+        output = generate_response(
+            question=text,
+            use_hyde=llm_bundle.hyde_enabled,
+            _bundle=llm_bundle,
+            _config=config,
+            label_filter=label_filter,
+        )
+
+        # Add assistant response to chat history
+        st.session_state["messages"].append(
+            {
+                "question": text,
+                "response": output.response,
+                "links": [f'<a href="{s.chunk_link}">{s.chunk_link}</a>' for s in output.semantic_search[::-1]],
+                "quality": f"{output.average_score:.2f}",
+            }
+        )
+        for h_response in st.session_state["messages"]:
+            with st.expander(label=f":question: **{h_response['question']}**", expanded=False):
+                st.markdown(f"##### {h_response['question']}")
+                st.write(h_response["response"])
+                st.markdown(f"\n---\n##### Serrch Quality Score: {h_response['quality']}")
+                st.markdown("##### Links")
+                for link in h_response["links"]:
+                    st.write("\t* " + link, unsafe_allow_html=True)
 
         for source in output.semantic_search[::-1]:
             source_path = source.metadata.pop("source")
@@ -119,12 +140,13 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str
 
                 st.text(f"\n\n{source.chunk_text}")
         if llm_bundle.hyde_enabled:
-            with st.expander(label=':octagonal_sign: **HyDE Reponse**', expanded=False):
+            with st.expander(label=":octagonal_sign: **HyDE Reponse**", expanded=False):
                 st.write(output.question)
 
         with chat_message("assistant"):
             st.write(f"**Search results quality score: {output.average_score:.2f}**\n")
-            st.write(output.response)
+            st.write(output.response)  # Add user message to chat history
+
 
 else:
     st.info("Choose a configuration template to start...")