-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
127 lines (107 loc) · 4.72 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import streamlit as st
from QASystem import QASystem
from openai_utils import init_embeddings_object
from chroma_vector_store import ChromaSearch
from pdf_utils import load_document_pages, split_document_into_chunks
from pinecone_vector_store import PineconeSearch
from formatter import format_docs
from dotenv import load_dotenv
load_dotenv()
st.set_page_config(page_title="PDF QA Tool", page_icon=":magic_wand:", layout="centered")
st.title(" :magic_wand: PDFQuest: PDF Question-Answering Tool")
st.caption("PDFQuest is an AI-powered PDF QA tool that enables you to easily upload and analyze PDF documents"
" with advanced question-answering capabilities. "
" PDFQuest goes beyond traditional keyword-based search "
" and utilizes vector embeddings and semantic search "
"to deliver precise and efficient results"
" from your PDFs to improve your workflow efficiency.")
st.divider()
def run_app():
uploader = PDFUploader()
# Load PDF file
uploader.load_pdf()
# Upload the documents to cloud/local session
uploader.upload()
if "run_qa" not in st.session_state:
st.session_state.run_qa = False
if not st.session_state.run_qa:
if st.button("Run Question-Answering Task"):
st.session_state.run_qa = True
else:
uploader.run_qa()
if uploader.show_qa:
uploader.display_result()
if st.button("Clear", key="qa_clear_button"):
st.session_state.run_qa = False
class PDFUploader:
def __init__(self):
self.file_path = None
self.pages = None
self.texts = None
self.file_key = "test"
self.question = None
self.result = None
self.vector_store = None
self.embeddings = None
# self.use_pinecone = os.environ.get('USE_PINECONE', 'false').lower() == 'true'
self.use_pinecone = st.secrets["USE_PINECONE"] == 'true'
# self.run_qa_with_source = os.environ.get('QA_WITH_SOURCE', 'false').lower() == 'true'
self.run_qa_with_source = st.secrets["QA_WITH_SOURCE"] == 'true'
self.show_qa = False
def load_pdf(self):
self.file_path = st.file_uploader("Upload PDF", type="pdf")
if self.file_path:
if self.file_path.type == "application/pdf":
print("PDF uploaded successfully.")
st.write(":white_check_mark: PDF uploaded successfully.")
self.pages = load_document_pages(self.file_path)
self.texts = split_document_into_chunks(self.pages)
else:
st.error("Please upload a PDF file.")
def upload(self):
if self.texts:
self.embeddings = init_embeddings_object()
search_strategy = self.get_search_strategy()
self.vector_store = search_strategy.push_documents(self.texts, self.embeddings)
st.write(":white_check_mark: PDF text uploaded to vector store.")
def get_search_strategy(self):
if self.use_pinecone:
print("Using Pinecone search strategy")
search_strategy = PineconeSearch()
else:
print("Using Chroma search strategy")
search_strategy = ChromaSearch()
return search_strategy
def run_qa(self):
if self.vector_store:
qa_system = QASystem()
self.question = st.text_input("Enter your question:", key='textbox', placeholder="Enter your question here")
if st.button("Ask"):
print(f"use source? {self.run_qa_with_source}")
if self.run_qa_with_source:
self.result = qa_system.retrieve_document(self.vector_store, self.question)
else:
self.result = qa_system.qa_without_sources(self.vector_store, self.question)
self.show_qa = True
else:
st.warning("Please upload a PDF file and click on Ask button.")
@staticmethod
def clear_callback():
st.session_state['textbox'] = ''
def display_result(self):
if self.result:
st.write(f":question: Question: {self.question}")
st.write(f":zap: Answer: {self.result['result']}")
if self.run_qa_with_source:
with st.expander("Show Source"):
st.write("The relevant source documents are:")
source = format_docs(self.result['source_documents'])
st.write(f"Source: {source}")
if st.button("Clear", on_click=self.clear_callback):
self.question = None
self.result = None
self.show_qa = False
else:
st.warning("Please run the question-answering task first.")
if __name__ == '__main__':
run_app()