Skip to content

Commit

Permalink
Merge pull request #830 from TheoMcCabe/vector-store-author-corrected
Browse files Browse the repository at this point in the history
Merging now as a starting point for context handling
  • Loading branch information
ATheorell authored Nov 3, 2023
2 parents 1408652 + 807b125 commit 92e4f0e
Show file tree
Hide file tree
Showing 34 changed files with 2,564 additions and 110 deletions.
5 changes: 2 additions & 3 deletions .github/CODEOWNERS
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
gpt_engineer/steps.py @pbharrin
gpt_engineer/chat_to_files.py @RareMojo
gpt_engineer/db.py @RareMojo
gpt_engineer/core/steps.py @pbharrin
gpt_engineer/data @TheoMcCabe
evals/ @UmerHA
.github/workflows/ @ATheorell
4 changes: 0 additions & 4 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,6 @@ archive
todo
scratchpad

# Ignore GPT Engineer files
projects
!projects/example

# Pyenv
.python-version

Expand Down
2 changes: 1 addition & 1 deletion docs/intro/harmony_db_ai_steps.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ Here is an example of a step function:
<br>

```python
def simple_gen(ai: AI, dbs: DBs):
def simple_gen(ai: AI, dbs: FileRepositories):
"""Generate code based on the main prompt."""
system = dbs.preprompts["generate"]
user = dbs.input["main_prompt"]
Expand Down
4 changes: 2 additions & 2 deletions evals/evals_existing_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
)

from gpt_engineer.core.chat_to_files import parse_chat
from gpt_engineer.core.db import DB
from gpt_engineer.data.file_repository import FileRepository

app = typer.Typer() # creates a CLI app

Expand All @@ -25,7 +25,7 @@ def single_evaluate(eval_ob: dict) -> list[bool]:
# load the known files into the project
# the files can be anywhere in the projects folder

workspace = DB(eval_ob["project_root"])
workspace = FileRepository(eval_ob["project_root"])
file_list_string = ""
code_base_abs = Path(os.getcwd()) / eval_ob["project_root"]

Expand Down
4 changes: 2 additions & 2 deletions evals/evals_new_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
load_evaluations_from_file,
)

from gpt_engineer.core.db import DB
from gpt_engineer.data.file_repository import FileRepository

app = typer.Typer() # creates a CLI app

Expand All @@ -20,7 +20,7 @@ def single_evaluate(eval_ob: dict) -> list[bool]:
"""Evaluates a single prompt for creating a new project."""
print(f"running evaluation: {eval_ob['name']}")

workspace = DB(eval_ob["project_root"])
workspace = FileRepository(eval_ob["project_root"])
base_abs = Path(os.getcwd())
code_base_abs = base_abs / eval_ob["project_root"]

Expand Down
2 changes: 1 addition & 1 deletion gpt_engineer/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
domain,
chat_to_files,
steps,
db,
)
from gpt_engineer.data import file_repository, code_vector_repository
8 changes: 5 additions & 3 deletions gpt_engineer/cli/collect.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from typing import List

from gpt_engineer.core import steps
from gpt_engineer.core.db import DBs
from gpt_engineer.data.file_repository import FileRepositories
from gpt_engineer.core.domain import Step
from gpt_engineer.cli.learning import Learning, extract_learning

Expand All @@ -39,7 +39,7 @@
from typing import List

from gpt_engineer.core import steps
from gpt_engineer.core.db import DBs
from gpt_engineer.data.file_repository import FileRepositories
from gpt_engineer.core.domain import Step
from gpt_engineer.cli.learning import Learning, extract_learning

Expand Down Expand Up @@ -71,7 +71,9 @@ def send_learning(learning: Learning):
)


def collect_learnings(model: str, temperature: float, steps: List[Step], dbs: DBs):
def collect_learnings(
model: str, temperature: float, steps: List[Step], dbs: FileRepositories
):
"""
Collect the learning data and send it to RudderStack for analysis.
Expand Down
4 changes: 2 additions & 2 deletions gpt_engineer/cli/file_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
from pathlib import Path
from typing import List, Union

from gpt_engineer.core.db import DB, DBs
from gpt_engineer.data.file_repository import FileRepository

IGNORE_FOLDERS = {"site-packages", "node_modules", "venv"}
FILE_LIST_NAME = "file_list.txt"
Expand Down Expand Up @@ -321,7 +321,7 @@ def is_in_ignoring_extensions(path: Path) -> bool:
return is_hidden and is_pycache


def ask_for_files(metadata_db: DB, workspace_db: DB) -> None:
def ask_for_files(metadata_db: FileRepository, workspace_db: FileRepository) -> None:
"""
Ask user to select files to improve.
It can be done by terminal, gui, or using the old selection.
Expand Down
10 changes: 7 additions & 3 deletions gpt_engineer/cli/learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@
from dataclasses_json import dataclass_json
from termcolor import colored

from gpt_engineer.core.db import DB, DBs
from gpt_engineer.data.file_repository import FileRepository, FileRepositories
from gpt_engineer.core.domain import Step


Expand Down Expand Up @@ -187,7 +187,7 @@ def ask_collection_consent() -> bool:
return False


def logs_to_string(steps: List[Step], logs: DB) -> str:
def logs_to_string(steps: List[Step], logs: FileRepository) -> str:
"""
Convert the logs of the steps to a string.
Expand All @@ -211,7 +211,11 @@ def logs_to_string(steps: List[Step], logs: DB) -> str:


def extract_learning(
model: str, temperature: float, steps: List[Step], dbs: DBs, steps_file_hash
model: str,
temperature: float,
steps: List[Step],
dbs: FileRepositories,
steps_file_hash,
) -> Learning:
"""
Extract the learning data from the steps and databases.
Expand Down
48 changes: 32 additions & 16 deletions gpt_engineer/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,12 @@
import typer
from dotenv import load_dotenv

from gpt_engineer.data.file_repository import FileRepository, FileRepositories, archive
from gpt_engineer.core.ai import AI
from gpt_engineer.core.db import DB, DBs, archive
from gpt_engineer.core.steps import STEPS, Config as StepsConfig
from gpt_engineer.cli.collect import collect_learnings
from gpt_engineer.cli.learning import check_collection_consent
from gpt_engineer.data.code_vector_repository import CodeVectorRepository

app = typer.Typer() # creates a CLI app

Expand All @@ -51,7 +52,7 @@ def load_env_if_needed():
openai.api_key = os.getenv("OPENAI_API_KEY")


def load_prompt(dbs: DBs):
def load_prompt(dbs: FileRepositories):
if dbs.input.get("prompt"):
return dbs.input.get("prompt")

Expand Down Expand Up @@ -90,6 +91,12 @@ def main(
"-i",
help="Improve code from existing project.",
),
vector_improve_mode: bool = typer.Option(
False,
"--vector-improve",
"-vi",
help="Improve code from existing project using vector store.",
),
lite_mode: bool = typer.Option(
False,
"--lite",
Expand Down Expand Up @@ -124,6 +131,12 @@ def main(
), "Improve mode not compatible with other step configs"
steps_config = StepsConfig.IMPROVE_CODE

if vector_improve_mode:
assert (
steps_config == StepsConfig.DEFAULT
), "Vector improve mode not compatible with other step configs"
steps_config = StepsConfig.VECTOR_IMPROVE

load_env_if_needed()

ai = AI(
Expand All @@ -145,37 +158,40 @@ def main(
memory_path = project_metadata_path / "memory"
archive_path = project_metadata_path / "archive"

dbs = DBs(
memory=DB(memory_path),
logs=DB(memory_path / "logs"),
input=DB(input_path),
workspace=DB(workspace_path),
preprompts=DB(preprompts_path(use_custom_preprompts, input_path)),
archive=DB(archive_path),
project_metadata=DB(project_metadata_path),
fileRepositories = FileRepositories(
memory=FileRepository(memory_path),
logs=FileRepository(memory_path / "logs"),
input=FileRepository(input_path),
workspace=FileRepository(workspace_path),
preprompts=FileRepository(preprompts_path(use_custom_preprompts, input_path)),
archive=FileRepository(archive_path),
project_metadata=FileRepository(project_metadata_path),
)

codeVectorRepository = CodeVectorRepository()

if steps_config not in [
StepsConfig.EXECUTE_ONLY,
StepsConfig.USE_FEEDBACK,
StepsConfig.EVALUATE,
StepsConfig.IMPROVE_CODE,
StepsConfig.VECTOR_IMPROVE,
StepsConfig.SELF_HEAL,
]:
archive(dbs)
load_prompt(dbs)
archive(fileRepositories)
load_prompt(fileRepositories)

steps = STEPS[steps_config]
for step in steps:
messages = step(ai, dbs)
dbs.logs[step.__name__] = AI.serialize_messages(messages)
messages = step(ai, fileRepositories)
fileRepositories.logs[step.__name__] = AI.serialize_messages(messages)

print("Total api cost: $ ", ai.token_usage_log.usage_cost())

if check_collection_consent():
collect_learnings(model, temperature, steps, dbs)
collect_learnings(model, temperature, steps, fileRepositories)

dbs.logs["token_usage"] = ai.token_usage_log.format_log()
fileRepositories.logs["token_usage"] = ai.token_usage_log.format_log()


if __name__ == "__main__":
Expand Down
14 changes: 8 additions & 6 deletions gpt_engineer/core/chat_to_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from dataclasses import dataclass
from typing import List, Tuple

from gpt_engineer.core.db import DB, DBs
from gpt_engineer.data.file_repository import FileRepository, FileRepositories
from gpt_engineer.cli.file_selector import FILE_LIST_NAME


Expand Down Expand Up @@ -87,7 +87,7 @@ def parse_chat(chat) -> List[Tuple[str, str]]:
return files


def to_files_and_memory(chat: str, dbs: DBs):
def to_files_and_memory(chat: str, dbs: FileRepositories):
"""
Save chat to memory, and parse chat to extracted file and save them to the workspace.
Expand All @@ -102,7 +102,7 @@ def to_files_and_memory(chat: str, dbs: DBs):
to_files(chat, dbs.workspace)


def to_files(chat: str, workspace: DB):
def to_files(chat: str, workspace: FileRepository):
"""
Parse the chat and add all extracted files to the workspace.
Expand All @@ -118,7 +118,9 @@ def to_files(chat: str, workspace: DB):
workspace[file_name] = file_content


def get_code_strings(workspace: DB, metadata_db: DB) -> dict[str, str]:
def get_code_strings(
workspace: FileRepository, metadata_db: FileRepository
) -> dict[str, str]:
"""
Read file_list.txt and return file names and their content.
Parameters
Expand Down Expand Up @@ -181,7 +183,7 @@ def format_file_to_input(file_name: str, file_content: str) -> str:
return file_str


def overwrite_files_with_edits(chat: str, dbs: DBs):
def overwrite_files_with_edits(chat: str, dbs: FileRepositories):
edits = parse_edits(chat)
apply_edits(edits, dbs.workspace)

Expand Down Expand Up @@ -234,7 +236,7 @@ def parse_all_edits(txt):
return parse_all_edits(llm_response)


def apply_edits(edits: List[Edit], workspace: DB):
def apply_edits(edits: List[Edit], workspace: FileRepository):
for edit in edits:
filename = edit.filename
if edit.before == "":
Expand Down
7 changes: 5 additions & 2 deletions gpt_engineer/core/domain.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@
from typing import Callable, List, TypeVar

from gpt_engineer.core.ai import AI
from gpt_engineer.core.db import DBs
from gpt_engineer.data.file_repository import FileRepositories
from gpt_engineer.data.code_vector_repository import CodeVectorRepository

Step = TypeVar("Step", bound=Callable[[AI, DBs], List[dict]])
Step = TypeVar(
"Step", bound=Callable[[AI, FileRepositories, CodeVectorRepository], List[dict]]
)
Loading

0 comments on commit 92e4f0e

Please sign in to comment.