Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor #846

Merged
merged 4 commits into from
Nov 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gpt_engineer/applications/cli/cli_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,5 +78,5 @@ def init(self, prompt: str) -> Code:
human_review(self.memory)
return code

def improve(self, prompt: str) -> Code:
def improve(self, prompt: str, code) -> Code:
pass
2 changes: 1 addition & 1 deletion gpt_engineer/core/base_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,5 +50,5 @@ def init(self, prompt: str) -> Code:
pass

@abstractmethod
def improve(self, prompt: str) -> Code:
def improve(self, prompt: str, code: Code) -> Code:
pass
81 changes: 32 additions & 49 deletions gpt_engineer/core/chat_to_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,15 @@
import logging

from dataclasses import dataclass
from typing import List, Tuple
from typing import List, Tuple, Union
from pathlib import Path

from gpt_engineer.core.default.on_disk_repository import (
OnDiskRepository,
FileRepositories,
# FileRepositories,
)
from gpt_engineer.applications.cli.file_selector import FILE_LIST_NAME

from gpt_engineer.core.code import Code

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -90,21 +91,6 @@ def parse_chat(chat) -> List[Tuple[str, str]]:
return files


def to_files_and_memory(chat: str, dbs: FileRepositories):
"""
Save chat to memory, and parse chat to extracted file and save them to the workspace.

Parameters
----------
chat : str
The chat to parse.
dbs : DBs
The databases that include the memory and workspace database
"""
dbs.memory["all_output.txt"] = chat
to_files(chat, dbs.workspace)


def to_files(chat: str, workspace: OnDiskRepository):
"""
Parse the chat and add all extracted files to the workspace.
Expand Down Expand Up @@ -186,9 +172,9 @@ def format_file_to_input(file_name: str, file_content: str) -> str:
return file_str


def overwrite_files_with_edits(chat: str, dbs: FileRepositories):
def overwrite_files_with_edits(chat: str, code: Code):
edits = parse_edits(chat)
apply_edits(edits, dbs.workspace)
apply_edits(edits, code)


@dataclass
Expand All @@ -198,7 +184,7 @@ class Edit:
after: str


def parse_edits(llm_response):
def parse_edits(chat: str):
def parse_one_edit(lines):
HEAD = "<<<<<<< HEAD"
DIVIDER = "======="
Expand All @@ -216,62 +202,59 @@ def parse_one_edit(lines):

return Edit(filename, before, after)

def parse_all_edits(txt):
edits = []
current_edit = []
in_fence = False

for line in txt.split("\n"):
if line.startswith("```") and in_fence:
edits.append(parse_one_edit(current_edit))
current_edit = []
in_fence = False
continue
elif line.startswith("```") and not in_fence:
in_fence = True
continue
edits = []
current_edit = []
in_fence = False

if in_fence:
current_edit.append(line)
for line in chat.split("\n"):
if line.startswith("```") and in_fence:
edits.append(parse_one_edit(current_edit))
current_edit = []
in_fence = False
continue
elif line.startswith("```") and not in_fence:
in_fence = True
continue

return edits
if in_fence:
current_edit.append(line)

return parse_all_edits(llm_response)
return edits


def apply_edits(edits: List[Edit], workspace: OnDiskRepository):
def apply_edits(edits: List[Edit], code: Code):
for edit in edits:
filename = edit.filename
if edit.before == "":
if workspace.get(filename) is not None:
logger.warn(
if filename in code:
logger.warning(
f"The edit to be applied wants to create a new file `{filename}`, but that already exists. The file will be overwritten. See `.gpteng/memory` for previous version."
)
workspace[filename] = edit.after # new file
code[filename] = edit.after # new file
else:
occurrences_cnt = workspace[filename].count(edit.before)
occurrences_cnt = code[filename].count(edit.before)
if occurrences_cnt == 0:
logger.warn(
logger.warning(
f"While applying an edit to `{filename}`, the code block to be replaced was not found. No instances will be replaced."
)
if occurrences_cnt > 1:
logger.warn(
logger.warning(
f"While applying an edit to `{filename}`, the code block to be replaced was found multiple times. All instances will be replaced."
)
workspace[filename] = workspace[filename].replace(
code[filename] = code[filename].replace(
edit.before, edit.after
) # existing file


def _get_all_files_in_dir(directory):
def _get_all_files_in_dir(directory: Union[str, Path]):
for root, dirs, files in os.walk(directory):
for file in files:
yield os.path.join(root, file)
for dir in dirs:
yield from _get_all_files_in_dir(os.path.join(root, dir))


def _open_file(file_path) -> str:
def _open_file(file_path: Union[str, Path]) -> str:
try:
with open(file_path, "r", encoding="utf-8") as f:
return f.read()
Expand Down
2 changes: 1 addition & 1 deletion gpt_engineer/core/default/lean_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,5 @@ def init(self, prompt: str) -> Code:
execute_entrypoint(self.execution_env, code)
return code

def improve(self, prompt: str) -> Code:
def improve(self, prompt: str, code: Code) -> Code:
pass
41 changes: 31 additions & 10 deletions gpt_engineer/core/default/steps.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from gpt_engineer.core.code import Code
from gpt_engineer.core.ai import AI
from gpt_engineer.core.chat_to_files import parse_chat
from gpt_engineer.core.chat_to_files import parse_chat, overwrite_files_with_edits, format_file_to_input
from gpt_engineer.core.default.paths import (
ENTRYPOINT_FILE,
CODE_GEN_LOG_FILE,
Expand Down Expand Up @@ -205,7 +205,28 @@ def execute_entrypoint(execution_env: BaseExecutionEnv, code: Code) -> None:
execution_env.execute_program(code)


def improve(ai: AI, prompt: str) -> Code:
def setup_sys_prompt_existing_code(db: OnDiskRepository) -> str:
"""
Constructs a system prompt for the AI focused on improving an existing codebase.

This function sets up the system prompts for the AI, guiding it on how to
work with and improve an existing code base. The generated prompt consists
of the "improve" instruction (with dynamic format replacements) and the coding
"philosophy" taken from the given DBs object.

Parameters:
- dbs (DBs): The database object containing pre-defined prompts and instructions.

Returns:
- str: The constructed system prompt focused on existing code improvement for the AI.
"""
return (
db.preprompts["improve"].replace("FILE_FORMAT", db.preprompts["file_format"])
+ "\nUseful to know:\n"
+ db.preprompts["philosophy"]
)

def improve(ai: AI, prompt: str, code: Code) -> Code:
"""
Process and improve the code from a specified set of existing files based on a user prompt.

Expand Down Expand Up @@ -236,21 +257,21 @@ def improve(ai: AI, prompt: str) -> Code:
to sent the formatted prompt to the LLM.
"""

files_info = get_code_strings(
dbs.workspace, dbs.project_metadata
) # this has file names relative to the workspace path

# files_info = get_code_strings(
# dbs.workspace, dbs.project_metadata
# ) # this has file names relative to the workspace path
db = OnDiskRepository(PREPROMPTS_PATH)
messages = [
SystemMessage(content=setup_sys_prompt_existing_code(dbs)),
SystemMessage(content=setup_sys_prompt_existing_code(db)),
]
# Add files as input
for file_name, file_str in files_info.items():
for file_name, file_str in code.items():
code_input = format_file_to_input(file_name, file_str)
messages.append(HumanMessage(content=f"{code_input}"))

messages.append(HumanMessage(content=f"Request: {dbs.input['prompt']}"))
messages.append(HumanMessage(content=f"Request: {prompt}"))

messages = ai.next(messages, step_name=curr_fn())

overwrite_files_with_edits(messages[-1].content.strip(), dbs)
overwrite_files_with_edits(messages[-1].content.strip(), code)
return messages
2 changes: 1 addition & 1 deletion gpt_engineer/legacy/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@
format_file_to_input,
get_code_strings,
overwrite_files_with_edits,
to_files_and_memory,
# to_files_and_memory,
)
from gpt_engineer.core.default.on_disk_repository import FileRepositories
from gpt_engineer.applications.cli.file_selector import FILE_LIST_NAME, ask_for_files
Expand Down
2 changes: 1 addition & 1 deletion projects/example-improve/run.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ source venv/bin/activate
pip install -r requirements.txt

# b) Run all necessary parts of the codebase
python src/main.py
python main.py
File renamed without changes.
Empty file.
File renamed without changes.
Empty file added tests/core/__init__.py
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Empty file added tests/legacy_steps/__init__.py
Empty file.
File renamed without changes.
File renamed without changes.
File renamed without changes.