Skip to content

Commit

Permalink
Quartz sync: Aug 9, 2024, 6:16 PM
Browse files Browse the repository at this point in the history
  • Loading branch information
saviorand committed Aug 9, 2024
1 parent de69f16 commit 84f927c
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 41 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ replit.nix
.vscode
content/excalibrain.md
content/Scripts/.env
StructuredReferences/
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[🇿](zotero://select/library/items/LUXMDBMB)

[[Entries/Individuals/Cesar Hidalgo]]
# Why Information Grows: The Evolution of Order, from Atoms to Economies (2015)

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[🇿](zotero://select/library/items/M4HCQDKN)

[[Entries/Individuals/Nils Gilman]]
# The New International Economic Order: A Reintroduction (NaN)

Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[🇿](zotero://select/library/items/DXEZ96HF)


# Ontology Design Patterns . org (ODP) - Odp

19 changes: 19 additions & 0 deletions content/Scripts/file_to_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import sys
import pylibmagic
from unstructured.partition.auto import partition

def file_to_text(file_path, txt_path):
elements = partition(filename=file_path)
with open(txt_path, 'w') as f:
for el in elements:
f.write(str(el))
f.write("\n")

if len(sys.argv) < 3:
print("Usage: python file_to_text.py input_file output_txt")
sys.exit(1)

pdf_path = sys.argv[1]
txt_path = sys.argv[2]

file_to_text(file_path, txt_path)
17 changes: 0 additions & 17 deletions content/Scripts/pdf_to_txt.py

This file was deleted.

35 changes: 31 additions & 4 deletions content/Scripts/prompts.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,36 @@
def correctness_check_prompt(domain_subjects):
return f'''You are a domain expert in the field of {domain_subjects}.
Check the Prolog code for correctness based on the text. Ensure all relationships are logically sound and perfectly consistent with the text.
def correctness_check_prompt():
# optionally add You are a domain expert in the field of {domain_subjects}.
return f'''Check the Prolog code for correctness and completeness based on the text. Ensure all relationships are logically sound and perfectly consistent with the text.
If you find any inconsistencies, correct them in the Prolog code.
If anything is missing, add missing predicates.
Please ONLY use the following predicates from the GFO ontology:
Please respond with prolog code only.'''
Classes:
Abstract Action Amount of substrate Awareness level Biological level Category Change Chemical level Chronoid Concept Concrete Configuration Configuroid Continuous Continuous change Continuous process Dependent Discrete Discrete presential Discrete process Entity Extrinsic change Function History Independent Individual Instantanuous change Intrinsic change Item Level Line Mass entity Material boundary Material line Material object Material persistant Material point Material stratum Material structure Material surface Mental stratum Occurrent Ontological layer Persistant Personality level Physical level Point Presential Process Processual role Property Property value Relational role Relator Role Set Situation Situoid Social role Social stratum Space Space time Spatial boundary Spatial region State Stratum Surface Symbol Symbol sequence Symbol structure Temporal region Time Time boundary Token Topoid Universal Value space
Object Properties:
abstract has part abstract part of agent in boundary of categorial part of category in layer caused by causes constituent part of depends on exists at framed by frames function determinant of functional item of goal of has boundary has categorial part has category has constituent part has function has function determinant has functional item has goal has left time boundary has member has part has participant has proper part has requirement has right time boundary has sequence constituent has spatial boundary has time boundary has token has value instance of instantiated by layer of left boundary of level of member of necessary for occupied by occupies on layer on level on stratum part of participates in plays role projection of projects to proper part of realized by realizes requirement of right boundary of role of sequence constituent of spatial boundary of stratum of time boundary of value of
Please respond with prolog code only.
'''

arity_two_prompt = '''You are an expert at creating Knowledge Graphs in Prolog.
Translate sentences in the text into Prolog code using predicates of arity 2.
Arity 2 predicates define relationships (verbs) between nouns, they are provided below.
You can ONLY use the following predicates:
Classes:
Abstract Action Amount of substrate Awareness level Biological level Category Change Chemical level Chronoid Concept Concrete Configuration Configuroid Continuous Continuous change Continuous process Dependent Discrete Discrete presential Discrete process Entity Extrinsic change Function History Independent Individual Instantanuous change Intrinsic change Item Level Line Mass entity Material boundary Material line Material object Material persistant Material point Material stratum Material structure Material surface Mental stratum Occurrent Ontological layer Persistant Personality level Physical level Point Presential Process Processual role Property Property value Relational role Relator Role Set Situation Situoid Social role Social stratum Space Space time Spatial boundary Spatial region State Stratum Surface Symbol Symbol sequence Symbol structure Temporal region Time Time boundary Token Topoid Universal Value space
Object Properties:
abstract has part abstract part of agent in boundary of categorial part of category in layer caused by causes constituent part of depends on exists at framed by frames function determinant of functional item of goal of has boundary has categorial part has category has constituent part has function has function determinant has functional item has goal has left time boundary has member has part has participant has proper part has requirement has right time boundary has sequence constituent has spatial boundary has time boundary has token has value instance of instantiated by layer of left boundary of level of member of necessary for occupied by occupies on layer on level on stratum part of participates in plays role projection of projects to proper part of realized by realizes requirement of right boundary of role of sequence constituent of spatial boundary of stratum of time boundary of value of
Please respond with prolog code only.
Text:
'''

relation_prompt = '''You are an expert at creating Knowledge Graphs in Prolog.
Translate sentences in the text into Prolog code using predicates of arity 2.
Expand Down
27 changes: 14 additions & 13 deletions content/Scripts/text_to_prolog.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,30 @@
import sys
from utils import parse_prolog_predicates, call_gpt_api, relation_correctness_check, file_to_chunks, text_to_relations, prolog_predicates_to_entities, entities_to_categorized_entities
from prompts import relation_prompt, categories_prompt, correctness_check_prompt
from prompts import relation_prompt, categories_prompt, correctness_check_prompt, arity_two_prompt

if len(sys.argv) < 3:
print("Usage: python text_to_prolog.py input_file domain_subjects")
print("Usage: python text_to_prolog.py input_file output_file domain_subjects")
sys.exit(1)

input_file = sys.argv[1] # e.g. 'nieo.txt'
domain_subjects = sys.argv[2] # e.g. 'NIEO, international relations, economics'
name = input_file.split('.')[0]
output_file = sys.argv[2] # e.g. 'nieo.pl'
# domain_subjects = sys.argv[3] # e.g. 'NIEO, international relations, economics'
output_file_noext = output_file.split('.')[0]

original_text_chunk_size = 2000
original_text_chunks = file_to_chunks(input_file, original_text_chunk_size)

relation_output_file = f'{name}_relations.pl'
entities_output_file = f'{name}_entities.pl'
categories_output_file = f'{name}_categories.pl'
relation_output_file = f'{output_file_noext}_relations.pl'
entities_output_file = f'{output_file_noext}_entities.pl'
categories_output_file = f'{output_file_noext}_categories.pl'

output_relations = text_to_relations(original_text_chunks, relation_output_file, relation_prompt, correctness_check_prompt(domain_subjects))
output_relations = text_to_relations(original_text_chunks, relation_output_file, arity_two_prompt, correctness_check_prompt())

entity_predicates = parse_prolog_predicates(output_relations)
# entity_predicates = parse_prolog_predicates(output_relations)

prolog_predicates_to_entities(entity_predicates, entities_output_file)
# prolog_predicates_to_entities(entity_predicates, entities_output_file)

entities_chunk_size = 2000
entities_chunks = file_to_chunks(entities_output_file, entities_chunk_size)
# entities_chunk_size = 2000
# entities_chunks = file_to_chunks(entities_output_file, entities_chunk_size)

entities_to_categorized_entities(entities_chunks, categories_output_file, categories_prompt)
# entities_to_categorized_entities(entities_chunks, categories_output_file, categories_prompt)
44 changes: 37 additions & 7 deletions content/Scripts/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,14 @@
from textwrap import wrap
from openai import OpenAI
from dotenv import load_dotenv
import anthropic

load_dotenv()

client = OpenAI(
claude_client = anthropic.Anthropic(
api_key=os.getenv("ANTHROPIC_API_KEY"),
)
gpt_client = OpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
)

Expand All @@ -20,7 +24,7 @@ def file_to_chunks(input_file, chunk_size):
def call_gpt_api(chunk, prompt):
"""Call the GPT API with the given chunk and prompt."""
try:
response = client.chat.completions.create(
response = gpt_client.chat.completions.create(
messages=[
{
"role": "system",
Expand All @@ -37,6 +41,30 @@ def call_gpt_api(chunk, prompt):
except Exception as e:
return f"Error: {str(e)}"

def call_claude_api(chunk, prompt):
try:
message = claude_client.messages.create(
system=prompt,
model="claude-3-5-sonnet-20240620",
max_tokens=len(chunk),
temperature=0,
messages=[
{
"role": "user",
"content": [
{
"type": "text",
"text": chunk
}
]
}
]
)
return message.content[0].text
except Exception as e:
return f"Error: {str(e)}"


def relation_correctness_check(text, prolog_code, prompt):
correctness_check_prompt = f'''{prompt}
Expand All @@ -45,18 +73,20 @@ def relation_correctness_check(text, prolog_code, prompt):
Original Text:'''

return call_gpt_api(text, correctness_check_prompt)
# return call_gpt_api(text, correctness_check_prompt)
return call_claude_api(text, correctness_check_prompt)

def text_to_relations(chunks, output_file, prompt, correctness_check_prompt):
"""Extract relations from a list of chunks and save to output file."""
complete_output = ""
with open(output_file, 'w') as f:
for i, chunk in enumerate(chunks):
print(f"Processing relations chunk {i+1}/{len(chunks)}")
preliminary_result = call_gpt_api(chunk, prompt)
result = relation_correctness_check(chunk, preliminary_result, correctness_check_prompt)
f.write(f"% Chunk {i+1}\n{result}\n\n")
complete_output += result + "\n"
# preliminary_result = call_gpt_api(chunk, prompt)
preliminary_result = call_claude_api(chunk, prompt)
# result = relation_correctness_check(chunk, preliminary_result, correctness_check_prompt)
f.write(f"% Chunk {i+1}\n{preliminary_result}\n\n")
complete_output += preliminary_result + "\n"

return complete_output

Expand Down

0 comments on commit 84f927c

Please sign in to comment.