diff --git a/gpt_engineer/applications/cli/cli_agent.py b/gpt_engineer/applications/cli/cli_agent.py index 663a892570..31673412de 100644 --- a/gpt_engineer/applications/cli/cli_agent.py +++ b/gpt_engineer/applications/cli/cli_agent.py @@ -78,5 +78,5 @@ def init(self, prompt: str) -> Code: human_review(self.memory) return code - def improve(self, prompt: str) -> Code: + def improve(self, prompt: str, code) -> Code: pass diff --git a/gpt_engineer/core/base_agent.py b/gpt_engineer/core/base_agent.py index 2e931acfaf..2c637e925a 100644 --- a/gpt_engineer/core/base_agent.py +++ b/gpt_engineer/core/base_agent.py @@ -50,5 +50,5 @@ def init(self, prompt: str) -> Code: pass @abstractmethod - def improve(self, prompt: str) -> Code: + def improve(self, prompt: str, code: Code) -> Code: pass diff --git a/gpt_engineer/core/chat_to_files.py b/gpt_engineer/core/chat_to_files.py index 2d7ccfcc04..e581943d8d 100644 --- a/gpt_engineer/core/chat_to_files.py +++ b/gpt_engineer/core/chat_to_files.py @@ -31,14 +31,15 @@ import logging from dataclasses import dataclass -from typing import List, Tuple +from typing import List, Tuple, Union +from pathlib import Path from gpt_engineer.core.default.on_disk_repository import ( OnDiskRepository, - FileRepositories, + # FileRepositories, ) from gpt_engineer.applications.cli.file_selector import FILE_LIST_NAME - +from gpt_engineer.core.code import Code logger = logging.getLogger(__name__) @@ -90,21 +91,6 @@ def parse_chat(chat) -> List[Tuple[str, str]]: return files -def to_files_and_memory(chat: str, dbs: FileRepositories): - """ - Save chat to memory, and parse chat to extracted file and save them to the workspace. - - Parameters - ---------- - chat : str - The chat to parse. - dbs : DBs - The databases that include the memory and workspace database - """ - dbs.memory["all_output.txt"] = chat - to_files(chat, dbs.workspace) - - def to_files(chat: str, workspace: OnDiskRepository): """ Parse the chat and add all extracted files to the workspace. @@ -186,9 +172,9 @@ def format_file_to_input(file_name: str, file_content: str) -> str: return file_str -def overwrite_files_with_edits(chat: str, dbs: FileRepositories): +def overwrite_files_with_edits(chat: str, code: Code): edits = parse_edits(chat) - apply_edits(edits, dbs.workspace) + apply_edits(edits, code) @dataclass @@ -198,7 +184,7 @@ class Edit: after: str -def parse_edits(llm_response): +def parse_edits(chat: str): def parse_one_edit(lines): HEAD = "<<<<<<< HEAD" DIVIDER = "=======" @@ -216,54 +202,51 @@ def parse_one_edit(lines): return Edit(filename, before, after) - def parse_all_edits(txt): - edits = [] - current_edit = [] - in_fence = False - - for line in txt.split("\n"): - if line.startswith("```") and in_fence: - edits.append(parse_one_edit(current_edit)) - current_edit = [] - in_fence = False - continue - elif line.startswith("```") and not in_fence: - in_fence = True - continue + edits = [] + current_edit = [] + in_fence = False - if in_fence: - current_edit.append(line) + for line in chat.split("\n"): + if line.startswith("```") and in_fence: + edits.append(parse_one_edit(current_edit)) + current_edit = [] + in_fence = False + continue + elif line.startswith("```") and not in_fence: + in_fence = True + continue - return edits + if in_fence: + current_edit.append(line) - return parse_all_edits(llm_response) + return edits -def apply_edits(edits: List[Edit], workspace: OnDiskRepository): +def apply_edits(edits: List[Edit], code: Code): for edit in edits: filename = edit.filename if edit.before == "": - if workspace.get(filename) is not None: - logger.warn( + if filename in code: + logger.warning( f"The edit to be applied wants to create a new file `{filename}`, but that already exists. The file will be overwritten. See `.gpteng/memory` for previous version." ) - workspace[filename] = edit.after # new file + code[filename] = edit.after # new file else: - occurrences_cnt = workspace[filename].count(edit.before) + occurrences_cnt = code[filename].count(edit.before) if occurrences_cnt == 0: - logger.warn( + logger.warning( f"While applying an edit to `{filename}`, the code block to be replaced was not found. No instances will be replaced." ) if occurrences_cnt > 1: - logger.warn( + logger.warning( f"While applying an edit to `{filename}`, the code block to be replaced was found multiple times. All instances will be replaced." ) - workspace[filename] = workspace[filename].replace( + code[filename] = code[filename].replace( edit.before, edit.after ) # existing file -def _get_all_files_in_dir(directory): +def _get_all_files_in_dir(directory: Union[str, Path]): for root, dirs, files in os.walk(directory): for file in files: yield os.path.join(root, file) @@ -271,7 +254,7 @@ def _get_all_files_in_dir(directory): yield from _get_all_files_in_dir(os.path.join(root, dir)) -def _open_file(file_path) -> str: +def _open_file(file_path: Union[str, Path]) -> str: try: with open(file_path, "r", encoding="utf-8") as f: return f.read() diff --git a/gpt_engineer/core/default/lean_agent.py b/gpt_engineer/core/default/lean_agent.py index cf7a974895..adef734383 100644 --- a/gpt_engineer/core/default/lean_agent.py +++ b/gpt_engineer/core/default/lean_agent.py @@ -76,5 +76,5 @@ def init(self, prompt: str) -> Code: execute_entrypoint(self.execution_env, code) return code - def improve(self, prompt: str) -> Code: + def improve(self, prompt: str, code: Code) -> Code: pass diff --git a/gpt_engineer/core/default/steps.py b/gpt_engineer/core/default/steps.py index f91147b437..ceedc5e3be 100644 --- a/gpt_engineer/core/default/steps.py +++ b/gpt_engineer/core/default/steps.py @@ -1,6 +1,6 @@ from gpt_engineer.core.code import Code from gpt_engineer.core.ai import AI -from gpt_engineer.core.chat_to_files import parse_chat +from gpt_engineer.core.chat_to_files import parse_chat, overwrite_files_with_edits, format_file_to_input from gpt_engineer.core.default.paths import ( ENTRYPOINT_FILE, CODE_GEN_LOG_FILE, @@ -205,7 +205,28 @@ def execute_entrypoint(execution_env: BaseExecutionEnv, code: Code) -> None: execution_env.execute_program(code) -def improve(ai: AI, prompt: str) -> Code: +def setup_sys_prompt_existing_code(db: OnDiskRepository) -> str: + """ + Constructs a system prompt for the AI focused on improving an existing codebase. + + This function sets up the system prompts for the AI, guiding it on how to + work with and improve an existing code base. The generated prompt consists + of the "improve" instruction (with dynamic format replacements) and the coding + "philosophy" taken from the given DBs object. + + Parameters: + - dbs (DBs): The database object containing pre-defined prompts and instructions. + + Returns: + - str: The constructed system prompt focused on existing code improvement for the AI. + """ + return ( + db.preprompts["improve"].replace("FILE_FORMAT", db.preprompts["file_format"]) + + "\nUseful to know:\n" + + db.preprompts["philosophy"] + ) + +def improve(ai: AI, prompt: str, code: Code) -> Code: """ Process and improve the code from a specified set of existing files based on a user prompt. @@ -236,21 +257,21 @@ def improve(ai: AI, prompt: str) -> Code: to sent the formatted prompt to the LLM. """ - files_info = get_code_strings( - dbs.workspace, dbs.project_metadata - ) # this has file names relative to the workspace path - + # files_info = get_code_strings( + # dbs.workspace, dbs.project_metadata + # ) # this has file names relative to the workspace path + db = OnDiskRepository(PREPROMPTS_PATH) messages = [ - SystemMessage(content=setup_sys_prompt_existing_code(dbs)), + SystemMessage(content=setup_sys_prompt_existing_code(db)), ] # Add files as input - for file_name, file_str in files_info.items(): + for file_name, file_str in code.items(): code_input = format_file_to_input(file_name, file_str) messages.append(HumanMessage(content=f"{code_input}")) - messages.append(HumanMessage(content=f"Request: {dbs.input['prompt']}")) + messages.append(HumanMessage(content=f"Request: {prompt}")) messages = ai.next(messages, step_name=curr_fn()) - overwrite_files_with_edits(messages[-1].content.strip(), dbs) + overwrite_files_with_edits(messages[-1].content.strip(), code) return messages diff --git a/gpt_engineer/legacy/steps.py b/gpt_engineer/legacy/steps.py index cd251d85ae..c3d63df34f 100644 --- a/gpt_engineer/legacy/steps.py +++ b/gpt_engineer/legacy/steps.py @@ -63,7 +63,7 @@ format_file_to_input, get_code_strings, overwrite_files_with_edits, - to_files_and_memory, + # to_files_and_memory, ) from gpt_engineer.core.default.on_disk_repository import FileRepositories from gpt_engineer.applications.cli.file_selector import FILE_LIST_NAME, ask_for_files diff --git a/projects/example-improve/run.sh b/projects/example-improve/run.sh old mode 100644 new mode 100755 index 9e7282fa34..75d32313fe --- a/projects/example-improve/run.sh +++ b/projects/example-improve/run.sh @@ -4,4 +4,4 @@ source venv/bin/activate pip install -r requirements.txt # b) Run all necessary parts of the codebase -python src/main.py +python main.py diff --git a/tests/steps/__init__.py b/tests/applications/__init__.py similarity index 100% rename from tests/steps/__init__.py rename to tests/applications/__init__.py diff --git a/tests/applications/cli/__init__.py b/tests/applications/cli/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_collect.py b/tests/applications/cli/test_collect.py similarity index 100% rename from tests/test_collect.py rename to tests/applications/cli/test_collect.py diff --git a/tests/test_collection_consent.py b/tests/applications/cli/test_collection_consent.py similarity index 100% rename from tests/test_collection_consent.py rename to tests/applications/cli/test_collection_consent.py diff --git a/tests/core/__init__.py b/tests/core/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/test_ai.py b/tests/core/test_ai.py similarity index 100% rename from tests/test_ai.py rename to tests/core/test_ai.py diff --git a/tests/test_chat_to_files.py b/tests/core/test_chat_to_files.py similarity index 100% rename from tests/test_chat_to_files.py rename to tests/core/test_chat_to_files.py diff --git a/tests/test_token_usage.py b/tests/core/test_token_usage.py similarity index 100% rename from tests/test_token_usage.py rename to tests/core/test_token_usage.py diff --git a/tests/legacy_steps/__init__.py b/tests/legacy_steps/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/steps/test_archive.py b/tests/legacy_steps/test_archive.py similarity index 100% rename from tests/steps/test_archive.py rename to tests/legacy_steps/test_archive.py diff --git a/tests/data/example_snake_files.py b/tests/tools/example_snake_files.py similarity index 100% rename from tests/data/example_snake_files.py rename to tests/tools/example_snake_files.py diff --git a/tests/data/test_code_vector_repository.py b/tests/tools/test_code_vector_repository.py similarity index 100% rename from tests/data/test_code_vector_repository.py rename to tests/tools/test_code_vector_repository.py diff --git a/tests/data/test_file_repository.py b/tests/tools/test_file_repository.py similarity index 100% rename from tests/data/test_file_repository.py rename to tests/tools/test_file_repository.py