-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathqa.py
199 lines (141 loc) · 10.7 KB
/
qa.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
import pickle
import openai
import os
import time
from utils.kg_gen import *
from KnowledgeGraph import *
from dotenv import load_dotenv
from utils.gpt import gpt_chat
load_dotenv()
openai.api_key = os.getenv("PROF_OPENAI_API_KEY")
def kg_qa (question: str, knowledge_graph:KnowledgeGraph):
messages = [{"role": "system", "content": "You are an expert in linguistics and knowledge graph. You will be given a question, output whether the answer to this question would be an distinct entity, relationship between two entities, or attributes of an entity/relationship."}, {"role": "user", "content": f"Question: {question}"}]
question_type_res = gpt_chat(messages, model="gpt-4-1106-preview")
print(question_type_res)
if ("attribut" in question_type_res) or ("Attribut" in question_type_res):
## Question about properties/attributes
# First check whether it is about attribute of an entity or a relation
messages = [{"role": "system", "content": " You are an expert in linguistics and knowledge graph. You will be given a question, output whether the answer to this question would be attributes of an entity, or attributes a relation between two entities."}, {"role": "user", "content": question}]
entity_or_relation_res = gpt_chat(messages, model="gpt-4-1106-preview")
print(entity_or_relation_res)
if "relation" in entity_or_relation_res:
entity_list = entity_extract(question)
if len(entity_list) != 1:
raise Exception("there are more than 1 entity in the question")
target_entity = knowledge_graph.find_entity(entity_list[0].name)
if target_entity == None:
raise Exception("entity in the question doesn't exist in the knowledge graph")
data_properties = target_entity.data_properties
print(data_properties)
messages = [{"role": "system", "content": "You will get a question about an entity and the answer in knowledge graph, based on that, phrase a final answer to the question"}, {"role": "user", "content": f"Question: {question}\nAnswer in knowledge graph: {str(data_properties)}\n"}]
final_answer = gpt_chat(messages)
return final_answer
else:
entities = entity_extract(question)
if len(entities) != 2:
raise Exception("there are more than 2 entities in the question")
messages = [{"role": "system", "content": "You are an expert in linguistics and knowledge graph. You are given two entities, and a text chunk. You help extract relations between the two entities. Do not add any external information outside of the text to the relations. Your output should be a triplet in this list format: ['Head_entity', 'relation', 'Tail_entity']"}, {"role": "user", "content": f"Entity 1: {entities[0].name}\nEntity 2: {entities[1].name}\nText: {question}"}]
triplet_res = gpt_chat(messages, model="gpt-4-1106-preview")
print(triplet_res)
triplet: [] = ast.literal_eval(triplet_res)
triplet[1] = f'{triplet[1].replace(" ", "_")}_Relation'
relation = knowledge_graph.find_relation(triplet[0], triplet[2], triplet[1])
if relation == None:
raise Exception("relation in the question doesn't exist in the knowledge graph")
data_properties = relation.data_properties
print(data_properties)
messages = [{"role": "system", "content": "You will get a question about an entity and the answer in knowledge graph, based on that, phrase a final answer to the question"}, {"role": "user", "content": f"Question: {question}\nAnswer in knowledge graph: {str(data_properties)}\n"}]
final_answer = gpt_chat(messages)
return final_answer
elif ("relation" in question_type_res) or ("Relation" in question_type_res):
## Question about relation
entities = entity_extract(question)
if len(entities) != 2:
raise Exception("there are more than 2 entities in the question")
start_entity = knowledge_graph.find_entity(entities[0].name)
end_entity = knowledge_graph.find_entity(entities[1].name)
if start_entity == None or end_entity == None:
raise Exception("One of the entities in the question doesn't exist in knowledge graph.")
path = knowledge_graph.find_path(start_entity, end_entity)
path_str = "["
for relation in path:
path_str += str(relation)
path_str += "]"
print(path_str)
messages = [{"role": "system", "content": "You will get a question about an entity and the answer in knowledge graph, based on that, phrase a final answer to the question"}, {"role": "user", "content": f"Question: {question}\nAnswer in knowledge graph: {path_str}\n"}]
final_answer = gpt_chat(messages)
return final_answer
elif ("entit" in question_type_res) or ("Entit" in question_type_res):
## Question about entity
question_modified = None
is_number_question = False
messages = [{"role": "system", "content": "You are an expert in linguistics and knowledge graph. You will be given a question, output whether this question is about an entity/entities, or number of entity/entities"}, {"role": "user", "content": question}]
entity_or_number_res = gpt_chat(messages, model="gpt-4-1106-preview")
if 'number' in entity_or_number_res:
is_number_question = True
messages = [{"role": "system", "content": "You will be given a question of finding the number of something, convert this question to \"Who\" or \"What\" type of question.\n\nExample:\n\"How many presidents were there between 2010-2020?\" Should be converted to \"Who are presidents between 2010-2020?\""}, {"role": "user", "content": question}]
question_modified = gpt_chat(messages, model="gpt-4-1106-preview")
messages = [{"role": "system", "content": "If the question is about multiple entities, convert it to a question about single entity, else output the same question. don't change content of question.\n\nExample: \n\"Where are all the restaurants in this town?\" should be converted to \"Where is the restaurant in this town?\"\n\n\"Who is the CEO of Meta\" should be the same."}, {"role": "user", "content": question}]
question_modified = gpt_chat(messages, model="gpt-4-1106-preview")
print(question_modified)
messages = [{"role": "system", "content": "You will be given a question related to an entity, convert the question into a statement and replace the entity asked with [ENTITY]\n\nFor example: question \"Who is the Chancellor of UIUC at 2015-2016?\" should be convert to \"[Entity] is the Chancellor of UIUC at 2015-2016.\""}, {"role": "user", "content": question_modified}]
text = gpt_chat(messages, model="gpt-4-1106-preview")
print(text)
# # Read subgraph from the pkl file
# with open('./kg_save/subgraph.pkl', 'rb') as file:
# subgraph: KnowledgeGraph = pickle.load(file)
subgraph = KnowledgeGraph(entities=dict(), relations=set(), types=set(), vdb_path='./subgraph_vdb')
entities = entity_extract(text, entity_question=True)
for entity in entities:
subgraph.add_entity(entity)
relations = predicate_extract(text=text, entities=entities, entity_question=True)
for relation in relations:
subgraph.add_relation(relation)
print(subgraph)
with open('./kg_save/subgraph.pkl', 'wb') as file:
pickle.dump(subgraph, file)
final_entities = None
for entity_name, entity in subgraph.entities.items():
if entity_name == "[ENTITY]":
continue
path = subgraph.find_path(entity, subgraph.entities["[ENTITY]"])
result_entities = knowledge_graph.find_matching_entities(relations=path, subgraph=subgraph, question=question_modified)
print(result_entities)
if len(result_entities) != 0:
if final_entities == None:
final_entities = result_entities
else:
final_entities = final_entities & result_entities
else:
final_entities = set()
entities_str = "["
for final_entity in final_entities:
entities_str += str(final_entity)
entities_str += "]"
print(entities_str)
messages = [{"role": "system", "content": "You will get a question about an entity and the answer in knowledge graph, based on that, phrase a final answer to the question"}, {"role": "user", "content": f"Question: {question}\nAnswer in knowledge graph: {entities_str if not is_number_question else len(final_entities)}\n"}]
final_answer = gpt_chat(messages)
return final_answer
else:
messages = [{"role": "system", "content": ""}, {"role": "user", "content": question}]
return gpt_chat(messages, model="gpt-4-1106-preview")
if __name__ == '__main__':
start_time = time.time()
question = "Who is the Chancellor of UIUC from 2015-2016?"
# Read knowledge graph from the pkl file
with open('./kg_save/knowledge_graph.pkl', 'rb') as file:
knowledge_graph: KnowledgeGraph = pickle.load(file)
messages = [{"role": "system", "content": "I have a QA engine based on knowledge graph. It only accepts questions about the name of one or more entities given other information, relation between two entities, attributes of an entity, and attributes of a relation. \n\nYou will be given a question. Can you list all the questions that my QA engine accepts and that combining answers to them gives answer to this question?\n\nYour output should be in this list format: [\"question1\", \"question2\", ...]"}, {"role": "user", "content": question}]
questions_list_res = gpt_chat(messages, model="gpt-4-1106-preview")
questions: [] = ast.literal_eval(format_list_answer(questions_list_res))
final_prompt = ""
for idx, sub_question in enumerate(questions):
answer = kg_qa(sub_question, knowledge_graph)
final_prompt += f"Question {idx + 1}: {sub_question}\nAnswer: {answer}\n\n"
final_prompt += f"Given answers to those questions, provide an answer to this final question:\n{question}"
print("Final prompt: ", final_prompt)
messages = [{"role": "system", "content": ""}, {"role": "user", "content": final_prompt}]
final_answer = gpt_chat(messages, model="gpt-4-1106-preview")
print("Final Answer: ", final_answer)
end_time = time.time()
print(f"Time elapsed: {end_time - start_time} seconds")