Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Various small cleanups #220

Merged
merged 1 commit into from
Apr 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 29 additions & 21 deletions babyagi.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,14 @@
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY", "")
assert OPENAI_API_KEY, "OPENAI_API_KEY environment variable is missing from .env"

OPENAI_API_MODEL = os.getenv("OPENAI_API_MODEL", "gpt-3.5-turbo")
LLM_MODEL = os.getenv("LLM_MODEL", os.getenv("OPENAI_API_MODEL", "gpt-3.5-turbo"))

# Table config
YOUR_TABLE_NAME = os.getenv("TABLE_NAME", "")
assert YOUR_TABLE_NAME, "TABLE_NAME environment variable is missing from .env"
RESULTS_STORE_NAME = os.getenv("RESULTS_STORE_NAME", os.getenv("TABLE_NAME", ""))
assert RESULTS_STORE_NAME, "RESULTS_STORE_NAME environment variable is missing from .env"

# Run configuration
BABY_NAME = os.getenv("BABY_NAME", "BabyAGI")
INSTANCE_NAME = os.getenv("INSTANCE_NAME", os.getenv("BABY_NAME", "BabyAGI"))
COOPERATIVE_MODE = "none"
JOIN_EXISTING_OBJECTIVE = False

Expand Down Expand Up @@ -57,32 +57,33 @@ def can_import(module_name):
if ENABLE_COMMAND_LINE_ARGS:
if can_import("extensions.argparseext"):
from extensions.argparseext import parse_arguments
OBJECTIVE, INITIAL_TASK, OPENAI_API_MODEL, DOTENV_EXTENSIONS, BABY_NAME, COOPERATIVE_MODE, JOIN_EXISTING_OBJECTIVE = parse_arguments()
OBJECTIVE, INITIAL_TASK, LLM_MODEL, DOTENV_EXTENSIONS, INSTANCE_NAME, COOPERATIVE_MODE, JOIN_EXISTING_OBJECTIVE = parse_arguments()

# Load additional environment variables for enabled extensions
# TODO: This might override the following command line arguments as well:
# OBJECTIVE, INITIAL_TASK, LLM_MODEL, INSTANCE_NAME, COOPERATIVE_MODE, JOIN_EXISTING_OBJECTIVE
if DOTENV_EXTENSIONS:
if can_import("extensions.dotenvext"):
from extensions.dotenvext import load_dotenv_extensions

load_dotenv_extensions(DOTENV_EXTENSIONS)


# TODO: There's still work to be done here to enable people to get
# defaults from dotenv extensions # but also provide command line
# defaults from dotenv extensions, but also provide command line
# arguments to override them

# Extensions support end

print("\033[95m\033[1m"+"\n*****CONFIGURATION*****\n"+"\033[0m\033[0m")
print(f"Name: {BABY_NAME}")
print(f"LLM : {OPENAI_API_MODEL}")
print(f"Name: {INSTANCE_NAME}")
print(f"LLM : {LLM_MODEL}")
print(f"Mode: {'alone' if COOPERATIVE_MODE in ['n', 'none'] else 'local' if COOPERATIVE_MODE in ['l', 'local'] else 'distributed' if COOPERATIVE_MODE in ['d', 'distributed'] else 'undefined'}")

# Check if we know what we are doing
assert OBJECTIVE, "OBJECTIVE environment variable is missing from .env"
assert INITIAL_TASK, "INITIAL_TASK environment variable is missing from .env"

if "gpt-4" in OPENAI_API_MODEL.lower():
if "gpt-4" in LLM_MODEL.lower():
print(
"\033[91m\033[1m"
+ "\n*****USING GPT-4. POTENTIALLY EXPENSIVE. MONITOR YOUR COSTS*****"
Expand All @@ -101,6 +102,8 @@ def can_import(module_name):
# Results storage using local ChromaDB
class DefaultResultsStorage:
def __init__(self):
import logging
logging.getLogger('chromadb').setLevel(logging.ERROR)
# Create Chroma collection
chroma_persist_dir = "chroma"
chroma_client = chromadb.Client(
Expand All @@ -110,11 +113,10 @@ def __init__(self):
)
)

table_name = YOUR_TABLE_NAME
metric = "cosine"
embedding_function = OpenAIEmbeddingFunction(api_key=OPENAI_API_KEY)
self.collection = chroma_client.get_or_create_collection(
name=table_name,
name=RESULTS_STORE_NAME,
metadata={"hnsw:space": metric},
embedding_function=embedding_function,
)
Expand All @@ -133,14 +135,16 @@ def add(self, task: Dict, result: Dict, result_id: int, vector: List):
ids=result_id,
documents=vector,
metadatas={"task": task["task_name"], "result": result},
) # Sleep before checking the task list again
)

def query(self, query: str, top_results_num: int) -> List[dict]:
count: int = self.collection.count()
if count == 0:
return []
results = self.collection.query(
query_texts=query, n_results=min(top_results_num, count), include=["metadatas"]
query_texts=query,
n_results=min(top_results_num, count),
include=["metadatas"]
)
return [item["task"] for item in results["metadatas"][0]]

Expand All @@ -154,7 +158,7 @@ def query(self, query: str, top_results_num: int) -> List[dict]:
PINECONE_ENVIRONMENT
), "PINECONE_ENVIRONMENT environment variable is missing from .env"
from extensions.pinecone_storage import PineconeResultsStorage
results_storage = PineconeResultsStorage(OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_ENVIRONMENT, YOUR_TABLE_NAME, OBJECTIVE)
results_storage = PineconeResultsStorage(OPENAI_API_KEY, PINECONE_API_KEY, PINECONE_ENVIRONMENT, RESULTS_STORE_NAME, OBJECTIVE)
print("\nReplacing results storage: " + "\033[93m\033[1m" + "Pinecone" + "\033[0m\033[0m")

# Task storage supporting only a single instance of BabyAGI
Expand Down Expand Up @@ -199,7 +203,7 @@ def get_task_names(self):

def openai_call(
prompt: str,
model: str = OPENAI_API_MODEL,
model: str = LLM_MODEL,
temperature: float = OPENAI_TEMPERATURE,
max_tokens: int = 100,
):
Expand Down Expand Up @@ -373,18 +377,21 @@ def main ():
print("\033[93m\033[1m" + "\n*****TASK RESULT*****\n" + "\033[0m\033[0m")
print(result)

# Step 2: Enrich result and store in Chroma
# Step 2: Enrich result and store in the results storage
# This is where you should enrich the result if needed
enriched_result = {
"data": result
} # This is where you should enrich the result if needed
}
# extract the actual result from the dictionary
# since we don't do enrichment currently
vector = enriched_result["data"]

result_id = f"result_{task['task_id']}"
vector = enriched_result[
"data"
] # extract the actual result from the dictionary

results_storage.add(task, result, result_id, vector)

# Step 3: Create new tasks and reprioritize task list
# only the main instance in cooperative mode does that
new_tasks = task_creation_agent(
OBJECTIVE,
enriched_result,
Expand All @@ -398,6 +405,7 @@ def main ():

if not JOIN_EXISTING_OBJECTIVE: prioritization_agent()

# Sleep a bit before checking the task list again
time.sleep(5)

if __name__ == "__main__":
Expand Down
20 changes: 10 additions & 10 deletions extensions/argparseext.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ def parse_arguments():
if not specified, get objective from environment.
''', default=[os.getenv("OBJECTIVE", "")])
parser.add_argument('-n', '--name', required=False, help='''
babyagi instance name.
if not specified, get baby_name from environment.
''', default=os.getenv("BABY_NAME", "BabyAGI"))
instance name.
if not specified, get the instance name from environment.
''', default=os.getenv("INSTANCE_NAME", os.getenv("BABY_NAME", "BabyAGI")))
parser.add_argument('-m', '--mode', choices=['n', 'none', 'l', 'local', 'd', 'distributed'], help='''
cooperative mode type
''', default='none')
Expand All @@ -64,10 +64,10 @@ def parse_arguments():
install cooperative requirements.
''')
group2 = parser.add_mutually_exclusive_group()
group2.add_argument('-4', '--gpt-4', dest='openai_api_model', action='store_const', const="gpt-4", help='''
group2.add_argument('-4', '--gpt-4', dest='llm_model', action='store_const', const="gpt-4", help='''
use GPT-4 instead of the default model.
''', default=os.getenv("OPENAI_API_MODEL", "gpt-3.5-turbo"))
group2.add_argument('-l', '--llama', dest='openai_api_model', action='store_const', const="llama", help='''
''')
group2.add_argument('-l', '--llama', dest='llm_model', action='store_const', const="llama", help='''
use LLaMa instead of the default model. Requires llama.cpp.
''')
# This will parse -e again, which we want, because we need
Expand All @@ -81,12 +81,12 @@ def parse_arguments():

args = parser.parse_args()

openai_api_model = args.openai_api_model
llm_model = args.llm_model if args.llm_model else os.getenv("LLM_MODEL", os.getenv("OPENAI_API_MODEL", "gpt-3.5-turbo"))

dotenv_extensions = args.env

baby_name = args.name
if not baby_name:
instance_name = args.name
if not instance_name:
print("\033[91m\033[1m" + "BabyAGI instance name missing\n" + "\033[0m\033[0m")
parser.print_help()
parser.exit()
Expand Down Expand Up @@ -120,4 +120,4 @@ def parse_arguments():
parser.print_help()
parser.exit()

return objective, initial_task, openai_api_model, dotenv_extensions, baby_name, cooperative_mode, join_existing_objective
return objective, initial_task, llm_model, dotenv_extensions, instance_name, cooperative_mode, join_existing_objective
10 changes: 5 additions & 5 deletions extensions/pinecone_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,23 +16,23 @@ def can_import(module_name):
), "\033[91m\033[1m"+"Pinecone storage requires package pinecone-client.\nInstall: pip install -r extensions/requirements.txt"

class PineconeResultsStorage:
def __init__(self, openai_api_key: str, pinecone_api_key: str, pinecone_environment: str, table_name: str, objective: str):
def __init__(self, openai_api_key: str, pinecone_api_key: str, pinecone_environment: str, results_store_name: str, objective: str):
openai.api_key = openai_api_key
pinecone.init(api_key=pinecone_api_key, environment=pinecone_environment)

# Pinecone namespaces are only compatible with ascii characters (used in query and upsert)
self.namespace = re.sub(re.compile('[^\x00-\x7F]+'), '', objective)

table_name = table_name
results_store_name = results_store_name
dimension = 1536
metric = "cosine"
pod_type = "p1"
if table_name not in pinecone.list_indexes():
if results_store_name not in pinecone.list_indexes():
pinecone.create_index(
table_name, dimension=dimension, metric=metric, pod_type=pod_type
results_store_name, dimension=dimension, metric=metric, pod_type=pod_type
)

self.index = pinecone.Index(table_name)
self.index = pinecone.Index(results_store_name)

def add(self, task: Dict, result: Dict, result_id: int, vector: List):
enriched_result = {
Expand Down