-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
74 lines (56 loc) · 2.04 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import os
import cassio
from PyPDF2 import PdfReader
from dotenv import load_dotenv
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain_community.vectorstores.cassandra import Cassandra
from langchain_openai import OpenAIEmbeddings, OpenAI
from langchain_text_splitters import CharacterTextSplitter
load_dotenv()
ASTRA_DB_APPLICATION_TOKEN = os.getenv("ASTRA_DB_APPLICATION_TOKEN")
ASTRA_DB_ID = os.getenv("ASTRA_DB_ID")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
llm = OpenAI(openai_api_key=OPENAI_API_KEY)
embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
cassio.init(token=ASTRA_DB_APPLICATION_TOKEN, database_id=ASTRA_DB_ID)
astra_vector_store = Cassandra(
embedding=embedding,
table_name="qa_mini_demo",
session=None,
keyspace=None,
)
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=800,
chunk_overlap=200,
length_function=len,
)
pdfreader = PdfReader('attention.pdf')
raw_text = ''
for i, page in enumerate(pdfreader.pages):
content = page.extract_text()
if content:
raw_text += content
texts = text_splitter.split_text(raw_text)
print(len(texts))
# For testing purposes
astra_vector_store.add_texts(texts[:50])
print("Inserted %i headlines." % len(texts[:50]))
astra_vector_index = VectorStoreIndexWrapper(vectorstore=astra_vector_store)
first_question = True
while True:
if first_question:
query_text = input("\nEnter your question (or type 'quit' to exit): ").strip()
else:
query_text = input("\nWhat's your next question (or type 'quit' to exit): ").strip()
if query_text.lower() == "quit":
break
if query_text == "":
continue
first_question = False
print("\nQUESTION: \"%s\"" % query_text)
answer = astra_vector_index.query(query_text, llm=llm).strip()
print("ANSWER: \"%s\"\n" % answer)
print("FIRST DOCUMENTS BY RELEVANCE:")
for doc, score in astra_vector_store.similarity_search_with_score(query_text, k=4):
print(" [%0.4f] \"%s ...\"" % (score, doc.page_content[:84]))