Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Vector Store initial implementation #830

Merged
merged 27 commits into from
Nov 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
gpt_engineer/steps.py @pbharrin
gpt_engineer/chat_to_files.py @RareMojo
gpt_engineer/db.py @RareMojo
gpt_engineer/core/steps.py @pbharrin
gpt_engineer/data @TheoMcCabe
evals/ @UmerHA
.github/workflows/ @ATheorell
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,6 @@ archive
todo
scratchpad

# Ignore GPT Engineer files
TheoMcCabe marked this conversation as resolved.
Show resolved Hide resolved
projects
!projects/example

# Pyenv
.python-version

Expand Down
2 changes: 1 addition & 1 deletion docs/intro/harmony_db_ai_steps.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Here is an example of a step function:
<br>

```python
def simple_gen(ai: AI, dbs: DBs):
def simple_gen(ai: AI, dbs: FileRepositories):
"""Generate code based on the main prompt."""
system = dbs.preprompts["generate"]
user = dbs.input["main_prompt"]
Expand Down
4 changes: 2 additions & 2 deletions evals/evals_existing_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
)

from gpt_engineer.core.chat_to_files import parse_chat
from gpt_engineer.core.db import DB
from gpt_engineer.data.file_repository import FileRepository

app = typer.Typer() # creates a CLI app

Expand All @@ -25,7 +25,7 @@ def single_evaluate(eval_ob: dict) -> list[bool]:
# load the known files into the project
# the files can be anywhere in the projects folder

workspace = DB(eval_ob["project_root"])
workspace = FileRepository(eval_ob["project_root"])
file_list_string = ""
code_base_abs = Path(os.getcwd()) / eval_ob["project_root"]

Expand Down
4 changes: 2 additions & 2 deletions evals/evals_new_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
load_evaluations_from_file,
)

from gpt_engineer.core.db import DB
from gpt_engineer.data.file_repository import FileRepository

app = typer.Typer() # creates a CLI app

Expand All @@ -20,7 +20,7 @@ def single_evaluate(eval_ob: dict) -> list[bool]:
"""Evaluates a single prompt for creating a new project."""
print(f"running evaluation: {eval_ob['name']}")

workspace = DB(eval_ob["project_root"])
workspace = FileRepository(eval_ob["project_root"])
base_abs = Path(os.getcwd())
code_base_abs = base_abs / eval_ob["project_root"]

Expand Down
2 changes: 1 addition & 1 deletion gpt_engineer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
domain,
chat_to_files,
steps,
db,
)
from gpt_engineer.data import file_repository, code_vector_repository
8 changes: 5 additions & 3 deletions gpt_engineer/cli/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from typing import List

from gpt_engineer.core import steps
from gpt_engineer.core.db import DBs
from gpt_engineer.data.file_repository import FileRepositories
from gpt_engineer.core.domain import Step
from gpt_engineer.cli.learning import Learning, extract_learning

Expand All @@ -39,7 +39,7 @@
from typing import List

from gpt_engineer.core import steps
from gpt_engineer.core.db import DBs
from gpt_engineer.data.file_repository import FileRepositories
from gpt_engineer.core.domain import Step
from gpt_engineer.cli.learning import Learning, extract_learning

Expand Down Expand Up @@ -71,7 +71,9 @@ def send_learning(learning: Learning):
)


def collect_learnings(model: str, temperature: float, steps: List[Step], dbs: DBs):
def collect_learnings(
model: str, temperature: float, steps: List[Step], dbs: FileRepositories
):
"""
Collect the learning data and send it to RudderStack for analysis.

Expand Down
4 changes: 2 additions & 2 deletions gpt_engineer/cli/file_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from pathlib import Path
from typing import List, Union

from gpt_engineer.core.db import DB, DBs
from gpt_engineer.data.file_repository import FileRepository

IGNORE_FOLDERS = {"site-packages", "node_modules", "venv"}
FILE_LIST_NAME = "file_list.txt"
Expand Down Expand Up @@ -321,7 +321,7 @@ def is_in_ignoring_extensions(path: Path) -> bool:
return is_hidden and is_pycache


def ask_for_files(metadata_db: DB, workspace_db: DB) -> None:
def ask_for_files(metadata_db: FileRepository, workspace_db: FileRepository) -> None:
"""
Ask user to select files to improve.
It can be done by terminal, gui, or using the old selection.
Expand Down
10 changes: 7 additions & 3 deletions gpt_engineer/cli/learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from dataclasses_json import dataclass_json
from termcolor import colored

from gpt_engineer.core.db import DB, DBs
from gpt_engineer.data.file_repository import FileRepository, FileRepositories
from gpt_engineer.core.domain import Step


Expand Down Expand Up @@ -187,7 +187,7 @@ def ask_collection_consent() -> bool:
return False


def logs_to_string(steps: List[Step], logs: DB) -> str:
def logs_to_string(steps: List[Step], logs: FileRepository) -> str:
"""
Convert the logs of the steps to a string.

Expand All @@ -211,7 +211,11 @@ def logs_to_string(steps: List[Step], logs: DB) -> str:


def extract_learning(
model: str, temperature: float, steps: List[Step], dbs: DBs, steps_file_hash
model: str,
temperature: float,
steps: List[Step],
dbs: FileRepositories,
steps_file_hash,
) -> Learning:
"""
Extract the learning data from the steps and databases.
Expand Down
48 changes: 32 additions & 16 deletions gpt_engineer/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@
import typer
from dotenv import load_dotenv

from gpt_engineer.data.file_repository import FileRepository, FileRepositories, archive
from gpt_engineer.core.ai import AI
from gpt_engineer.core.db import DB, DBs, archive
from gpt_engineer.core.steps import STEPS, Config as StepsConfig
from gpt_engineer.cli.collect import collect_learnings
from gpt_engineer.cli.learning import check_collection_consent
from gpt_engineer.data.code_vector_repository import CodeVectorRepository

app = typer.Typer() # creates a CLI app

Expand All @@ -51,7 +52,7 @@ def load_env_if_needed():
openai.api_key = os.getenv("OPENAI_API_KEY")


def load_prompt(dbs: DBs):
def load_prompt(dbs: FileRepositories):
if dbs.input.get("prompt"):
return dbs.input.get("prompt")

Expand Down Expand Up @@ -90,6 +91,12 @@ def main(
"-i",
help="Improve code from existing project.",
),
vector_improve_mode: bool = typer.Option(
False,
"--vector-improve",
"-vi",
help="Improve code from existing project using vector store.",
),
lite_mode: bool = typer.Option(
False,
"--lite",
Expand Down Expand Up @@ -124,6 +131,12 @@ def main(
), "Improve mode not compatible with other step configs"
steps_config = StepsConfig.IMPROVE_CODE

if vector_improve_mode:
assert (
steps_config == StepsConfig.DEFAULT
), "Vector improve mode not compatible with other step configs"
steps_config = StepsConfig.VECTOR_IMPROVE

load_env_if_needed()

ai = AI(
Expand All @@ -145,37 +158,40 @@ def main(
memory_path = project_metadata_path / "memory"
archive_path = project_metadata_path / "archive"

dbs = DBs(
memory=DB(memory_path),
logs=DB(memory_path / "logs"),
input=DB(input_path),
workspace=DB(workspace_path),
preprompts=DB(preprompts_path(use_custom_preprompts, input_path)),
archive=DB(archive_path),
project_metadata=DB(project_metadata_path),
fileRepositories = FileRepositories(
memory=FileRepository(memory_path),
logs=FileRepository(memory_path / "logs"),
input=FileRepository(input_path),
workspace=FileRepository(workspace_path),
preprompts=FileRepository(preprompts_path(use_custom_preprompts, input_path)),
archive=FileRepository(archive_path),
project_metadata=FileRepository(project_metadata_path),
)

codeVectorRepository = CodeVectorRepository()

if steps_config not in [
StepsConfig.EXECUTE_ONLY,
StepsConfig.USE_FEEDBACK,
StepsConfig.EVALUATE,
StepsConfig.IMPROVE_CODE,
StepsConfig.VECTOR_IMPROVE,
StepsConfig.SELF_HEAL,
]:
archive(dbs)
load_prompt(dbs)
archive(fileRepositories)
load_prompt(fileRepositories)

steps = STEPS[steps_config]
for step in steps:
messages = step(ai, dbs)
dbs.logs[step.__name__] = AI.serialize_messages(messages)
messages = step(ai, fileRepositories)
fileRepositories.logs[step.__name__] = AI.serialize_messages(messages)

print("Total api cost: $ ", ai.token_usage_log.usage_cost())

if check_collection_consent():
collect_learnings(model, temperature, steps, dbs)
collect_learnings(model, temperature, steps, fileRepositories)

dbs.logs["token_usage"] = ai.token_usage_log.format_log()
fileRepositories.logs["token_usage"] = ai.token_usage_log.format_log()


if __name__ == "__main__":
Expand Down
14 changes: 8 additions & 6 deletions gpt_engineer/core/chat_to_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from dataclasses import dataclass
from typing import List, Tuple

from gpt_engineer.core.db import DB, DBs
from gpt_engineer.data.file_repository import FileRepository, FileRepositories
from gpt_engineer.cli.file_selector import FILE_LIST_NAME


Expand Down Expand Up @@ -87,7 +87,7 @@ def parse_chat(chat) -> List[Tuple[str, str]]:
return files


def to_files_and_memory(chat: str, dbs: DBs):
def to_files_and_memory(chat: str, dbs: FileRepositories):
"""
Save chat to memory, and parse chat to extracted file and save them to the workspace.

Expand All @@ -102,7 +102,7 @@ def to_files_and_memory(chat: str, dbs: DBs):
to_files(chat, dbs.workspace)


def to_files(chat: str, workspace: DB):
def to_files(chat: str, workspace: FileRepository):
"""
Parse the chat and add all extracted files to the workspace.

Expand All @@ -118,7 +118,9 @@ def to_files(chat: str, workspace: DB):
workspace[file_name] = file_content


def get_code_strings(workspace: DB, metadata_db: DB) -> dict[str, str]:
def get_code_strings(
workspace: FileRepository, metadata_db: FileRepository
) -> dict[str, str]:
"""
Read file_list.txt and return file names and their content.
Parameters
Expand Down Expand Up @@ -181,7 +183,7 @@ def format_file_to_input(file_name: str, file_content: str) -> str:
return file_str


def overwrite_files_with_edits(chat: str, dbs: DBs):
def overwrite_files_with_edits(chat: str, dbs: FileRepositories):
edits = parse_edits(chat)
apply_edits(edits, dbs.workspace)

Expand Down Expand Up @@ -234,7 +236,7 @@ def parse_all_edits(txt):
return parse_all_edits(llm_response)


def apply_edits(edits: List[Edit], workspace: DB):
def apply_edits(edits: List[Edit], workspace: FileRepository):
for edit in edits:
filename = edit.filename
if edit.before == "":
Expand Down
7 changes: 5 additions & 2 deletions gpt_engineer/core/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from typing import Callable, List, TypeVar

from gpt_engineer.core.ai import AI
from gpt_engineer.core.db import DBs
from gpt_engineer.data.file_repository import FileRepositories
from gpt_engineer.data.code_vector_repository import CodeVectorRepository

Step = TypeVar("Step", bound=Callable[[AI, DBs], List[dict]])
Step = TypeVar(
"Step", bound=Callable[[AI, FileRepositories, CodeVectorRepository], List[dict]]
)
Loading