From e3891f4756225991d02f596184f44f1865d88d74 Mon Sep 17 00:00:00 2001 From: Val Date: Tue, 30 Jul 2024 20:50:16 +0200 Subject: [PATCH] Quartz sync: Jul 30, 2024, 8:50 PM --- content/Entries/prolog_to_folder.py | 70 +++++++++++++++++++ content/Entries/text_to_prolog.py | 60 ++++++++++++++++ ...ph Extraction by LLM Ontology-prompting.md | 5 ++ content/References/computerProgram/OntoGPT.md | 5 ++ ...ogy Matching with Large Language Models.md | 5 ++ ...entific text with large language models.md | 5 ++ ...onstruction Using Large Language Models.md | 5 ++ ...nowledge bases using zero-shot learning.md | 7 ++ ...y Extraction through Question-Answering.md | 5 ++ 9 files changed, 167 insertions(+) create mode 100644 content/Entries/prolog_to_folder.py create mode 100644 content/Entries/text_to_prolog.py create mode 100644 content/References/blogPost/Encouraging results for Knowledge Graph Extraction by LLM Ontology-prompting.md create mode 100644 content/References/computerProgram/OntoGPT.md create mode 100644 content/References/conferencePaper/OLaLa Ontology Matching with Large Language Models.md create mode 100644 content/References/journalArticle/Structured information extraction from scientific text with large language models.md create mode 100644 content/References/preprint/Enhancing Knowledge Graph Construction Using Large Language Models.md create mode 100644 content/References/preprint/Structured prompt interrogation and recursive extraction of semantics (SPIRES) A method for populating knowledge bases using zero-shot learning.md create mode 100644 content/References/webpage/Leveraging Large Language Models for Ontology Extraction through Question-Answering.md diff --git a/content/Entries/prolog_to_folder.py b/content/Entries/prolog_to_folder.py new file mode 100644 index 0000000..abac73d --- /dev/null +++ b/content/Entries/prolog_to_folder.py @@ -0,0 +1,70 @@ +import os +import re + +def parse_prolog_file(file_path): + with open(file_path, 'r') as file: + content = file.read() + + arity_1_predicates = {} + arity_2_predicates = [] + + # Parse arity 1 predicates + for match in re.finditer(r'(\w+)\((\w+)\)\.', content): + predicate, parameter = match.groups() + if predicate not in arity_1_predicates: + arity_1_predicates[predicate] = set() + arity_1_predicates[predicate].add(parameter) + + # Parse arity 2 predicates + for match in re.finditer(r'(\w+)\((\w+),\s*(\w+)\)\.', content): + predicate, param1, param2 = match.groups() + arity_2_predicates.append((predicate, param1, param2)) + + return arity_1_predicates, arity_2_predicates +def create_folders_and_files(arity_1_predicates, arity_2_predicates, output_dir): + os.makedirs(output_dir, exist_ok=True) + + # Create folders and files for arity 1 predicates + for predicate, parameters in arity_1_predicates.items(): + predicate_dir = os.path.join(output_dir, predicate) + os.makedirs(predicate_dir, exist_ok=True) + pred_file_path = os.path.join(predicate_dir, f"{predicate}.md") + with open(pred_file_path, 'a') as file: + file.write("%% Waypoint ") + file.write("%% \n") + + for parameter in parameters: + file_path = os.path.join(predicate_dir, f"{parameter}.md") + with open(file_path, 'w') as file: + file.write(f"# {parameter}\n\n") + + # Add arity 2 predicates as links + for predicate, param1, param2 in arity_2_predicates: + # Find which arity 1 predicate contains param1 + for arity_1_pred, params in arity_1_predicates.items(): + if param1 in params: + param_file_path = os.path.join(output_dir, arity_1_pred, f"{param1}.md") + with open(param_file_path, 'a') as file: + file.write(f"{predicate}::[[{param2}]]\n") + break + else: + uncategorized_dir = os.path.join(output_dir, "uncategorized") + print(f"Warning: No matching arity 1 predicate found for {param1} in {predicate}({param1}, {param2}). Saving in {uncategorized_dir}") + os.makedirs(uncategorized_dir, exist_ok=True) + param_file_path = os.path.join(uncategorized_dir, f"{param1}.md") + with open(param_file_path, 'a') as file: + file.write(f"{predicate}::[[{param2}]]\n") + + print("Folders and files created successfully!") + +def main(): + input_file = "./test.pl" # Change this to your Prolog file name + output_dir = "output" # Change this to your desired output directory + + arity_1_predicates, arity_2_predicates = parse_prolog_file(input_file) + create_folders_and_files(arity_1_predicates, arity_2_predicates, output_dir) + + print("Folders and files created successfully!") + +if __name__ == "__main__": + main() diff --git a/content/Entries/text_to_prolog.py b/content/Entries/text_to_prolog.py new file mode 100644 index 0000000..4b042b5 --- /dev/null +++ b/content/Entries/text_to_prolog.py @@ -0,0 +1,60 @@ +import openai +import os +from textwrap import wrap + +# Set up your OpenAI API key +openai.api_key = 'keyhere' + +def split_text(text, chunk_size): + """Split the text into chunks of specified size.""" + return wrap(text, chunk_size, break_long_words=False) + +def call_gpt_api(chunk, prompt): + """Call the GPT API with the given chunk and prompt.""" + try: + response = openai.ChatCompletion.create( + model="gpt-4o-mini", + messages=[ + {"role": "system", "content": prompt}, + {"role": "user", "content": chunk} + ] + ) + return response.choices[0].message['content'] + except Exception as e: + return f"Error: {str(e)}" + +def process_text(input_file, output_file, chunk_size, prompt): + """Process the input text file and save results to the output file.""" + with open(input_file, 'r') as f: + text = f.read() + + chunks = split_text(text, chunk_size) + + with open(output_file, 'w') as f: + for i, chunk in enumerate(chunks): + print(f"Processing chunk {i+1}/{len(chunks)}") + result = call_gpt_api(chunk, prompt) + f.write(f"% Chunk {i+1}\n{result}\n\n") + +# Example usage +input_file = 'input.txt' +output_file = 'output.txt' +chunk_size = 2000 # Adjust this based on your needs and API limitations +prompt = '''can you parse concepts and relationships from this text into prolog code? Please use the predicates below. When a predicate is missing, create a new one of arity 1 or 2. You will likely need to create more relations of arity 2. Please respond with prolog code only. + +abstract_concept/1 +physical_entity/1 +country/1 +person/1 +theoretical_framework/1 +model/1 + +parent/2 +category/2 +property/2 +has/2 +contributes/2 + +Text:''' + +process_text(input_file, output_file, chunk_size, prompt) diff --git a/content/References/blogPost/Encouraging results for Knowledge Graph Extraction by LLM Ontology-prompting.md b/content/References/blogPost/Encouraging results for Knowledge Graph Extraction by LLM Ontology-prompting.md new file mode 100644 index 0000000..5de949a --- /dev/null +++ b/content/References/blogPost/Encouraging results for Knowledge Graph Extraction by LLM Ontology-prompting.md @@ -0,0 +1,5 @@ +[🇿](zotero://select/library/items/I9GKP7GD) + +[[Entries/Individuals/Peter Lawrence, answering users' data questions]] +# Encouraging results for Knowledge Graph Extraction by LLM Ontology-prompting (2023) + diff --git a/content/References/computerProgram/OntoGPT.md b/content/References/computerProgram/OntoGPT.md new file mode 100644 index 0000000..f5bc62f --- /dev/null +++ b/content/References/computerProgram/OntoGPT.md @@ -0,0 +1,5 @@ +[🇿](zotero://select/library/items/GNA7T4FQ) + +[[Entries/Individuals/J. Harry Caufield]] [[Entries/Individuals/Harshad Hegde]] [[Entries/Individuals/Vincent Emonet]] [[Entries/Individuals/Nomi L. Harris]] [[Entries/Individuals/Marcin P. Joachimiak]] [[Entries/Individuals/Nicolas Matentzoglu]] [[Entries/Individuals/HyeongSik Kim]] [[Entries/Individuals/Sierra A.T. Moxon]] [[Entries/Individuals/Justin T. Reese]] [[Entries/Individuals/Melissa A. Haendel]] [[Entries/Individuals/Peter N. Robinson]] [[Entries/Individuals/Christopher J. Mungall]] +# OntoGPT (2024) + diff --git a/content/References/conferencePaper/OLaLa Ontology Matching with Large Language Models.md b/content/References/conferencePaper/OLaLa Ontology Matching with Large Language Models.md new file mode 100644 index 0000000..e818f3e --- /dev/null +++ b/content/References/conferencePaper/OLaLa Ontology Matching with Large Language Models.md @@ -0,0 +1,5 @@ +[🇿](zotero://select/library/items/EQ38M8HE) + +[[Entries/Individuals/Sven Hertling]] [[Entries/Individuals/Heiko Paulheim]] +# OLaLa: Ontology Matching with Large Language Models (2023) + diff --git a/content/References/journalArticle/Structured information extraction from scientific text with large language models.md b/content/References/journalArticle/Structured information extraction from scientific text with large language models.md new file mode 100644 index 0000000..3ee0798 --- /dev/null +++ b/content/References/journalArticle/Structured information extraction from scientific text with large language models.md @@ -0,0 +1,5 @@ +[🇿](zotero://select/library/items/INL5CWJC) + +[[Entries/Individuals/John Dagdelen]] [[Entries/Individuals/Alexander Dunn]] [[Entries/Individuals/Sanghoon Lee]] [[Entries/Individuals/Nicholas Walker]] [[Entries/Individuals/Andrew S. Rosen]] [[Entries/Individuals/Gerbrand Ceder]] [[Entries/Individuals/Kristin A. Persson]] [[Entries/Individuals/Anubhav Jain]] +# Structured information extraction from scientific text with large language models (2024) + diff --git a/content/References/preprint/Enhancing Knowledge Graph Construction Using Large Language Models.md b/content/References/preprint/Enhancing Knowledge Graph Construction Using Large Language Models.md new file mode 100644 index 0000000..a6cb080 --- /dev/null +++ b/content/References/preprint/Enhancing Knowledge Graph Construction Using Large Language Models.md @@ -0,0 +1,5 @@ +[🇿](zotero://select/library/items/46X2B48B) + +[[Entries/Individuals/Milena Trajanoska]] [[Entries/Individuals/Riste Stojanov]] [[Entries/Individuals/Dimitar Trajanov]] +# Enhancing Knowledge Graph Construction Using Large Language Models (2023) + diff --git a/content/References/preprint/Structured prompt interrogation and recursive extraction of semantics (SPIRES) A method for populating knowledge bases using zero-shot learning.md b/content/References/preprint/Structured prompt interrogation and recursive extraction of semantics (SPIRES) A method for populating knowledge bases using zero-shot learning.md new file mode 100644 index 0000000..3c1e71c --- /dev/null +++ b/content/References/preprint/Structured prompt interrogation and recursive extraction of semantics (SPIRES) A method for populating knowledge bases using zero-shot learning.md @@ -0,0 +1,7 @@ +[🇿](zotero://select/library/items/43JB77SY) + +[[Entries/Individuals/J. Harry Caufield]] [[Entries/Individuals/Harshad Hegde]] [[Entries/Individuals/Vincent Emonet]] [[Entries/Individuals/Nomi L. Harris]] [[Entries/Individuals/Marcin P. Joachimiak]] [[Entries/Individuals/Nicolas Matentzoglu]] [[Entries/Individuals/HyeongSik Kim]] [[Entries/Individuals/Sierra A. T. Moxon]] [[Entries/Individuals/Justin T. Reese]] [[Entries/Individuals/Melissa A. Haendel]] [[Entries/Individuals/Peter N. Robinson]] [[Entries/Individuals/Christopher J. Mungall]] +# Structured prompt interrogation and recursive extraction of semantics (SPIRES): A method for populating knowledge bases using zero-shot learning (2023) + +Comment: Updated 2023-12-22 + diff --git a/content/References/webpage/Leveraging Large Language Models for Ontology Extraction through Question-Answering.md b/content/References/webpage/Leveraging Large Language Models for Ontology Extraction through Question-Answering.md new file mode 100644 index 0000000..cb5835f --- /dev/null +++ b/content/References/webpage/Leveraging Large Language Models for Ontology Extraction through Question-Answering.md @@ -0,0 +1,5 @@ +[🇿](zotero://select/library/items/RARZ7ZYI) + + +# Leveraging Large Language Models for Ontology Extraction through Question-Answering +