-
Notifications
You must be signed in to change notification settings - Fork 0
/
neo4j_vectorization_txt
109 lines (76 loc) · 2.69 KB
/
neo4j_vectorization_txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_community.document_loaders import DirectoryLoader, TextLoader
COURSES_PATH = "llm-vectors-unstructured/data/asciidoc"
# Load lesson documents
loader = DirectoryLoader(COURSES_PATH, glob="**/lesson.adoc", loader_cls=TextLoader)
docs = loader.load()
# Create a text splitter
# text_splitter =
# Split documents into chunks
# chunks =
# Create a Neo4j vector store
# neo4j_db =
------------------------------------------------------------------------------------------------------------- create embeddings with Neo4jVector
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_community.document_loaders import DirectoryLoader, TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.vectorstores.neo4j_vector import Neo4jVector
from langchain_openai import OpenAIEmbeddings
COURSES_PATH = "llm-vectors-unstructured/data/asciidoc"
loader = DirectoryLoader(COURSES_PATH, glob="**/lesson.adoc", loader_cls=TextLoader)
docs = loader.load()
text_splitter = CharacterTextSplitter(
separator="\n\n",
chunk_size=1500,
chunk_overlap=200,
)
chunks = text_splitter.split_documents(docs)
print(chunks)
neo4j_db = Neo4jVector.from_documents(
chunks,
OpenAIEmbeddings(openai_api_key=os.getenv('OPENAI_API_KEY')),
url=os.getenv('NEO4J_URI'),
username=os.getenv('NEO4J_USERNAME'),
password=os.getenv('NEO4J_PASSWORD'),
database="neo4j",
index_name="chunkVector",
node_label="Chunk",
text_node_property="text",
embedding_node_property="embedding",
)
MATCH (c:Chunk) RETURN c LIMIT 25
WITH genai.vector.encode(
"What does Hallucination mean?",
"OpenAI",
{ token: "sk-..." }) AS userEmbedding
CALL db.index.vector.queryNodes('chunkVector', 6, userEmbedding)
YIELD node, score
RETURN node.text, score
------------------------------------------------------------------------------------------------------------------------- create embeddings with llm
import os
from dotenv import load_dotenv
load_dotenv()
from openai import OpenAI
from langchain_community.graphs import Neo4jGraph
llm = OpenAI(api_key=os.getenv('OPENAI_API_KEY'))
response = llm.embeddings.create(
input="What does Hallucination mean?",
model="text-embedding-ada-002"
)
embedding = response.data[0].embedding
graph = Neo4jGraph(
url=os.getenv('NEO4J_URI'),
username=os.getenv('NEO4J_USERNAME'),
password=os.getenv('NEO4J_PASSWORD')
)
result = graph.query("""
CALL db.index.vector.queryNodes('chunkVector', 6, $embedding)
YIELD node, score
RETURN node.text, score
""", {"embedding": embedding})
for row in result:
print(row['node.text'], row['score'])