-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
167 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
import os | ||
import re | ||
|
||
def parse_prolog_file(file_path): | ||
with open(file_path, 'r') as file: | ||
content = file.read() | ||
|
||
arity_1_predicates = {} | ||
arity_2_predicates = [] | ||
|
||
# Parse arity 1 predicates | ||
for match in re.finditer(r'(\w+)\((\w+)\)\.', content): | ||
predicate, parameter = match.groups() | ||
if predicate not in arity_1_predicates: | ||
arity_1_predicates[predicate] = set() | ||
arity_1_predicates[predicate].add(parameter) | ||
|
||
# Parse arity 2 predicates | ||
for match in re.finditer(r'(\w+)\((\w+),\s*(\w+)\)\.', content): | ||
predicate, param1, param2 = match.groups() | ||
arity_2_predicates.append((predicate, param1, param2)) | ||
|
||
return arity_1_predicates, arity_2_predicates | ||
def create_folders_and_files(arity_1_predicates, arity_2_predicates, output_dir): | ||
os.makedirs(output_dir, exist_ok=True) | ||
|
||
# Create folders and files for arity 1 predicates | ||
for predicate, parameters in arity_1_predicates.items(): | ||
predicate_dir = os.path.join(output_dir, predicate) | ||
os.makedirs(predicate_dir, exist_ok=True) | ||
pred_file_path = os.path.join(predicate_dir, f"{predicate}.md") | ||
with open(pred_file_path, 'a') as file: | ||
file.write("%% Waypoint ") | ||
file.write("%% \n") | ||
|
||
for parameter in parameters: | ||
file_path = os.path.join(predicate_dir, f"{parameter}.md") | ||
with open(file_path, 'w') as file: | ||
file.write(f"# {parameter}\n\n") | ||
|
||
# Add arity 2 predicates as links | ||
for predicate, param1, param2 in arity_2_predicates: | ||
# Find which arity 1 predicate contains param1 | ||
for arity_1_pred, params in arity_1_predicates.items(): | ||
if param1 in params: | ||
param_file_path = os.path.join(output_dir, arity_1_pred, f"{param1}.md") | ||
with open(param_file_path, 'a') as file: | ||
file.write(f"{predicate}::[[{param2}]]\n") | ||
break | ||
else: | ||
uncategorized_dir = os.path.join(output_dir, "uncategorized") | ||
print(f"Warning: No matching arity 1 predicate found for {param1} in {predicate}({param1}, {param2}). Saving in {uncategorized_dir}") | ||
os.makedirs(uncategorized_dir, exist_ok=True) | ||
param_file_path = os.path.join(uncategorized_dir, f"{param1}.md") | ||
with open(param_file_path, 'a') as file: | ||
file.write(f"{predicate}::[[{param2}]]\n") | ||
|
||
print("Folders and files created successfully!") | ||
|
||
def main(): | ||
input_file = "./test.pl" # Change this to your Prolog file name | ||
output_dir = "output" # Change this to your desired output directory | ||
|
||
arity_1_predicates, arity_2_predicates = parse_prolog_file(input_file) | ||
create_folders_and_files(arity_1_predicates, arity_2_predicates, output_dir) | ||
|
||
print("Folders and files created successfully!") | ||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
import openai | ||
import os | ||
from textwrap import wrap | ||
|
||
# Set up your OpenAI API key | ||
openai.api_key = 'keyhere' | ||
|
||
def split_text(text, chunk_size): | ||
"""Split the text into chunks of specified size.""" | ||
return wrap(text, chunk_size, break_long_words=False) | ||
|
||
def call_gpt_api(chunk, prompt): | ||
"""Call the GPT API with the given chunk and prompt.""" | ||
try: | ||
response = openai.ChatCompletion.create( | ||
model="gpt-4o-mini", | ||
messages=[ | ||
{"role": "system", "content": prompt}, | ||
{"role": "user", "content": chunk} | ||
] | ||
) | ||
return response.choices[0].message['content'] | ||
except Exception as e: | ||
return f"Error: {str(e)}" | ||
|
||
def process_text(input_file, output_file, chunk_size, prompt): | ||
"""Process the input text file and save results to the output file.""" | ||
with open(input_file, 'r') as f: | ||
text = f.read() | ||
|
||
chunks = split_text(text, chunk_size) | ||
|
||
with open(output_file, 'w') as f: | ||
for i, chunk in enumerate(chunks): | ||
print(f"Processing chunk {i+1}/{len(chunks)}") | ||
result = call_gpt_api(chunk, prompt) | ||
f.write(f"% Chunk {i+1}\n{result}\n\n") | ||
|
||
# Example usage | ||
input_file = 'input.txt' | ||
output_file = 'output.txt' | ||
chunk_size = 2000 # Adjust this based on your needs and API limitations | ||
prompt = '''can you parse concepts and relationships from this text into prolog code? Please use the predicates below. When a predicate is missing, create a new one of arity 1 or 2. You will likely need to create more relations of arity 2. Please respond with prolog code only. | ||
abstract_concept/1 | ||
physical_entity/1 | ||
country/1 | ||
person/1 | ||
theoretical_framework/1 | ||
model/1 | ||
parent/2 | ||
category/2 | ||
property/2 | ||
has/2 | ||
contributes/2 | ||
Text:''' | ||
|
||
process_text(input_file, output_file, chunk_size, prompt) |
5 changes: 5 additions & 0 deletions
5
...Encouraging results for Knowledge Graph Extraction by LLM Ontology-prompting.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[🇿](zotero://select/library/items/I9GKP7GD) | ||
|
||
[[Entries/Individuals/Peter Lawrence, answering users' data questions]] | ||
# Encouraging results for Knowledge Graph Extraction by LLM Ontology-prompting (2023) | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[🇿](zotero://select/library/items/GNA7T4FQ) | ||
|
||
[[Entries/Individuals/J. Harry Caufield]] [[Entries/Individuals/Harshad Hegde]] [[Entries/Individuals/Vincent Emonet]] [[Entries/Individuals/Nomi L. Harris]] [[Entries/Individuals/Marcin P. Joachimiak]] [[Entries/Individuals/Nicolas Matentzoglu]] [[Entries/Individuals/HyeongSik Kim]] [[Entries/Individuals/Sierra A.T. Moxon]] [[Entries/Individuals/Justin T. Reese]] [[Entries/Individuals/Melissa A. Haendel]] [[Entries/Individuals/Peter N. Robinson]] [[Entries/Individuals/Christopher J. Mungall]] | ||
# OntoGPT (2024) | ||
|
5 changes: 5 additions & 0 deletions
5
...eferences/conferencePaper/OLaLa Ontology Matching with Large Language Models.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[🇿](zotero://select/library/items/EQ38M8HE) | ||
|
||
[[Entries/Individuals/Sven Hertling]] [[Entries/Individuals/Heiko Paulheim]] | ||
# OLaLa: Ontology Matching with Large Language Models (2023) | ||
|
5 changes: 5 additions & 0 deletions
5
...tured information extraction from scientific text with large language models.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[🇿](zotero://select/library/items/INL5CWJC) | ||
|
||
[[Entries/Individuals/John Dagdelen]] [[Entries/Individuals/Alexander Dunn]] [[Entries/Individuals/Sanghoon Lee]] [[Entries/Individuals/Nicholas Walker]] [[Entries/Individuals/Andrew S. Rosen]] [[Entries/Individuals/Gerbrand Ceder]] [[Entries/Individuals/Kristin A. Persson]] [[Entries/Individuals/Anubhav Jain]] | ||
# Structured information extraction from scientific text with large language models (2024) | ||
|
5 changes: 5 additions & 0 deletions
5
.../preprint/Enhancing Knowledge Graph Construction Using Large Language Models.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[🇿](zotero://select/library/items/46X2B48B) | ||
|
||
[[Entries/Individuals/Milena Trajanoska]] [[Entries/Individuals/Riste Stojanov]] [[Entries/Individuals/Dimitar Trajanov]] | ||
# Enhancing Knowledge Graph Construction Using Large Language Models (2023) | ||
|
7 changes: 7 additions & 0 deletions
7
...cs (SPIRES) A method for populating knowledge bases using zero-shot learning.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
[🇿](zotero://select/library/items/43JB77SY) | ||
|
||
[[Entries/Individuals/J. Harry Caufield]] [[Entries/Individuals/Harshad Hegde]] [[Entries/Individuals/Vincent Emonet]] [[Entries/Individuals/Nomi L. Harris]] [[Entries/Individuals/Marcin P. Joachimiak]] [[Entries/Individuals/Nicolas Matentzoglu]] [[Entries/Individuals/HyeongSik Kim]] [[Entries/Individuals/Sierra A. T. Moxon]] [[Entries/Individuals/Justin T. Reese]] [[Entries/Individuals/Melissa A. Haendel]] [[Entries/Individuals/Peter N. Robinson]] [[Entries/Individuals/Christopher J. Mungall]] | ||
# Structured prompt interrogation and recursive extraction of semantics (SPIRES): A method for populating knowledge bases using zero-shot learning (2023) | ||
|
||
Comment: Updated 2023-12-22 | ||
|
5 changes: 5 additions & 0 deletions
5
...ing Large Language Models for Ontology Extraction through Question-Answering.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
[🇿](zotero://select/library/items/RARZ7ZYI) | ||
|
||
|
||
# Leveraging Large Language Models for Ontology Extraction through Question-Answering | ||
|