Skip to content

Commit

Permalink
Merge pull request #59 from snexus/feature-web-chat-history
Browse files Browse the repository at this point in the history
Add chat history in the web app
  • Loading branch information
snexus authored Oct 22, 2023
2 parents 676c8e7 + d727c3a commit f37e0ad
Show file tree
Hide file tree
Showing 3 changed files with 45 additions and 21 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ The purpose of this package is to offer a convenient question-answering system w
* Supports the "Retrieve and Re-rank" strategy for semantic search, see - https://www.sbert.net/examples/applications/retrieve_rerank/README.html.

* Supports HyDE (Hypothetical Document Embeddings) - https://arxiv.org/pdf/2212.10496.pdf
* WARNING: Enabling HyDE can significantly alter the quality of the results. Please make sure to read the paper before enabling.

* Allows interaction with embedded documents, supporting the following models and methods (including locally hosted):
* OpenAI models (ChatGPT 3.5/4 and Azure OpenAI).
Expand Down Expand Up @@ -83,7 +84,8 @@ pip install . # or `pip install -e .` for development

To create a configuration file in YAML format, you can refer to the example template provided in `sample_templates/config_template.yaml`.

The sample configuration file specifies how to load one of the supported locally hosted models, downloaded from Huggingface - https://huggingface.co/TheBloke/wizardLM-13B-1.0-GGML/resolve/main/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin
The sample configuration file specifies how to load one of the supported locally hosted models, downloaded from Huggingface -
https://huggingface.co/TheBloke/airoboros-l2-13B-gpt4-1.4.1-GGUF/resolve/main/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf

As an alternative uncomment the llm section for OpenAI model.

Expand Down Expand Up @@ -130,7 +132,7 @@ llmsearch interact llm -c /path/to/config.yaml

Based on the example configuration provided in the sample configuration file, the following actions will take place:

- The system will load a quantized GGML model using the LlamaCpp framework. The model file is located at `/storage/llm/cache/WizardLM-13B-1.0-GGML/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin`.
- The system will load a quantized GGUF model using the LlamaCpp framework. The model file is located at `/storage/llm/cache/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf`
- The model will be partially loaded into the GPU (30 layers) and partially into the CPU (remaining layers). The `n_gpu_layers` parameter can be adjusted according to the hardware limitations.
- Additional LlamaCpp specific parameters specified in `model_kwargs` from the `llm->params` section will be passed to the model.
- The system will query the embeddings database using hybrid search algorithm using sparse and dense embeddings. It will provide the most relevant context from different documents, up to a maximum context size of 4096 characters (`max_char_size` in `semantic_search`).
Expand Down
2 changes: 1 addition & 1 deletion sample_templates/config_template.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ persist_response_db_path: "/path/to/responses.db" # optional sqlite database fi
llm:
type: llamacpp
params:
model_path: /storage/llm/cache/WizardLM-13B-1.0-GGML/WizardLM-13B-1.0.ggmlv3.q5_K_S.bin
model_path: /storage/llm/cache/airoboros-l2-13b-gpt4-1.4.1.Q4_K_M.gguf
prompt_template: |
### Instruction:
Use the following pieces of context to answer the question at the end. If answer isn't in the context, say that you don't know, don't try to make up an answer.
Expand Down
58 changes: 40 additions & 18 deletions src/llmsearch/webapp.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def get_bundle(config):


@st.cache_data
def generate_response(question: str, _config: Config, _bundle, label_filter: str = ""):
def generate_response(question: str, use_hyde: bool, _config: Config, _bundle, label_filter: str = ""):
# _config and _bundle are under scored so paratemeters aren't hashed

output = get_and_parse_response(query=question, config=_config, llm_bundle=_bundle, label=label_filter)
Expand All @@ -68,9 +68,11 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str
if args.cli_config_path:
config_file = args.cli_config_path
else:
config_file = st.sidebar.file_uploader(
"Select tempate to load", type=["yml", "yaml"]
)
config_file = st.sidebar.file_uploader("Select tempate to load", type=["yml", "yaml"])

# Initialsize state for historical resutls
if "messages" not in st.session_state:
st.session_state["messages"] = []


if config_file is not None:
Expand All @@ -82,29 +84,48 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str

st.sidebar.write(f"**Model type:** {config.llm.type}")

st.sidebar.write(
f"**Document path**: {config.embeddings.document_settings[0].doc_path}"
)
st.sidebar.write(f"**Document path**: {config.embeddings.document_settings[0].doc_path}")
st.sidebar.write(f"**Embedding path:** {config.embeddings.embeddings_path}")
st.sidebar.write(
f"**Max char size (semantic search):** {config.semantic_search.max_char_size}"
)
st.sidebar.write(f"**Max char size (semantic search):** {config.semantic_search.max_char_size}")
label_filter = ""
if config.embeddings.labels:
label_filter = st.sidebar.selectbox(label="Filter by label", options = ["-"] + config.embeddings.labels)
if label_filter is None or label_filter == '-':
label_filter = st.sidebar.selectbox(label="Filter by label", options=["-"] + config.embeddings.labels)
if label_filter is None or label_filter == "-":
label_filter = ""

llm_bundle = get_bundle(config)

text = st.chat_input("Enter text")
is_hyde = st.sidebar.checkbox(label = "Use HyDE (cost: 2 api calls)", value=llm_bundle.hyde_enabled)
is_hyde = st.sidebar.checkbox(label="Use HyDE (cost: 2 api calls)", value=llm_bundle.hyde_enabled)

if text:

# Dynamically switch hyde
llm_bundle.hyde_enabled = is_hyde
output = generate_response(question=text, _bundle=llm_bundle, _config=config, label_filter = label_filter)
output = generate_response(
question=text,
use_hyde=llm_bundle.hyde_enabled,
_bundle=llm_bundle,
_config=config,
label_filter=label_filter,
)

# Add assistant response to chat history
st.session_state["messages"].append(
{
"question": text,
"response": output.response,
"links": [f'<a href="{s.chunk_link}">{s.chunk_link}</a>' for s in output.semantic_search[::-1]],
"quality": f"{output.average_score:.2f}",
}
)
for h_response in st.session_state["messages"]:
with st.expander(label=f":question: **{h_response['question']}**", expanded=False):
st.markdown(f"##### {h_response['question']}")
st.write(h_response["response"])
st.markdown(f"\n---\n##### Serrch Quality Score: {h_response['quality']}")
st.markdown("##### Links")
for link in h_response["links"]:
st.write("\t* " + link, unsafe_allow_html=True)

for source in output.semantic_search[::-1]:
source_path = source.metadata.pop("source")
Expand All @@ -119,12 +140,13 @@ def generate_response(question: str, _config: Config, _bundle, label_filter: str

st.text(f"\n\n{source.chunk_text}")
if llm_bundle.hyde_enabled:
with st.expander(label=':octagonal_sign: **HyDE Reponse**', expanded=False):
with st.expander(label=":octagonal_sign: **HyDE Reponse**", expanded=False):
st.write(output.question)

with chat_message("assistant"):
st.write(f"**Search results quality score: {output.average_score:.2f}**\n")
st.write(output.response)
st.write(output.response) # Add user message to chat history


else:
st.info("Choose a configuration template to start...")

0 comments on commit f37e0ad

Please sign in to comment.