diff --git a/gpt_engineer/chat_to_files.py b/gpt_engineer/chat_to_files.py index 6a3c781175..1b98d09cb7 100644 --- a/gpt_engineer/chat_to_files.py +++ b/gpt_engineer/chat_to_files.py @@ -1,3 +1,4 @@ +import os import re @@ -40,3 +41,56 @@ def to_files(chat, workspace): files = parse_chat(chat) for file_name, file_content in files: workspace[file_name] = file_content + + +def overwrite_files(chat, dbs, replace_files): + """ + Replace the AI files to the older local files. + """ + dbs.workspace["all_output.txt"] = chat + + files = parse_chat(chat) + for file_name, file_content in files: + # Verify if the file created by the AI agent was in the input list + if file_name in replace_files: + # If the AI created a file from our input list, we replace it. + with open(replace_files[file_name], "w") as text_file: + text_file.write(file_content) + else: + # If the AI create a new file I don't know where to put it yet + # maybe we can think in a smarter solution for this in the future + # like asking the AI where to put it. + # + # by now, just add this to the workspace inside .gpteng folder + print( + f"Could not find file path for '{file_name}', creating file in workspace" + ) + dbs.workspace[file_name] = file_content + + +def get_code_strings(input) -> dict[str, str]: + """ + Read file_list.txt and return file names and its content. + """ + files_paths = input["file_list.txt"].strip().split("\n") + files_dict = {} + for file_path in files_paths: + with open(file_path, "r") as file: + file_data = file.read() + if file_data: + file_name = os.path.basename(file_path).split("/")[-1] + files_dict[file_name] = file_data + return files_dict + + +def format_file_to_input(file_name: str, file_content: str) -> str: + """ + Format a file string to use as input to AI agent + """ + file_str = f""" + {file_name} + ``` + {file_content} + ``` + """ + return file_str diff --git a/gpt_engineer/file_selector.py b/gpt_engineer/file_selector.py new file mode 100644 index 0000000000..484a16fad0 --- /dev/null +++ b/gpt_engineer/file_selector.py @@ -0,0 +1,321 @@ +import os +import re +import sys +import tkinter as tk +import tkinter.filedialog as fd + +from pathlib import Path +from typing import List, Union + +IGNORE_FOLDERS = {"site-packages", "node_modules"} + +class DisplayablePath(object): + """ + A class representing a displayable path in a file explorer. + """ + + display_filename_prefix_middle = "├── " + display_filename_prefix_last = "└── " + display_parent_prefix_middle = " " + display_parent_prefix_last = "│ " + + def __init__( + self, path: Union[str, Path], parent_path: "DisplayablePath", is_last: bool + ): + """ + Initialize a DisplayablePath object. + + Args: + path (Union[str, Path]): The path of the file or directory. + parent_path (DisplayablePath): The parent path of the file or directory. + is_last (bool): Whether the file or directory is the last child of its parent. + """ + self.depth: int = 0 + self.path = Path(str(path)) + self.parent = parent_path + self.is_last = is_last + if self.parent: + self.depth = self.parent.depth + 1 + + @property + def display_name(self) -> str: + """ + Get the display name of the file or directory. + + Returns: + str: The display name. + """ + if self.path.is_dir(): + return self.path.name + "/" + return self.path.name + + @classmethod + def make_tree(cls, root: Union[str, Path], parent=None, is_last=False, criteria=None): + """ + Generate a tree of DisplayablePath objects. + + Args: + root: The root path of the tree. + parent: The parent path of the root path. Defaults to None. + is_last: Whether the root path is the last child of its parent. + criteria: The criteria function to filter the paths. Defaults to None. + + Yields: + DisplayablePath: The DisplayablePath objects in the tree. + """ + root = Path(str(root)) + criteria = criteria or cls._default_criteria + + displayable_root = cls(root, parent, is_last) + yield displayable_root + + children = sorted( + list(path for path in root.iterdir() if criteria(path)), + key=lambda s: str(s).lower(), + ) + count = 1 + for path in children: + is_last = count == len(children) + if path.is_dir() and path.name not in IGNORE_FOLDERS: + yield from cls.make_tree( + path, parent=displayable_root, is_last=is_last, criteria=criteria + ) + else: + yield cls(path, displayable_root, is_last) + count += 1 + + @classmethod + def _default_criteria(cls, path: Path) -> bool: + """ + The default criteria function to filter the paths. + + Args: + path: The path to check. + + Returns: + bool: True if the path should be included, False otherwise. + """ + return True + + def displayable(self) -> str: + """ + Get the displayable string representation of the file or directory. + + Returns: + str: The displayable string representation. + """ + if self.parent is None: + return self.display_name + + _filename_prefix = ( + self.display_filename_prefix_last + if self.is_last + else self.display_filename_prefix_middle + ) + + parts = ["{!s} {!s}".format(_filename_prefix, self.display_name)] + + parent = self.parent + while parent and parent.parent is not None: + parts.append( + self.display_parent_prefix_middle + if parent.is_last + else self.display_parent_prefix_last + ) + parent = parent.parent + + return "".join(reversed(parts)) + + +class TerminalFileSelector: + def __init__(self, root_folder_path: Path) -> None: + self.number_of_selectable_items = 0 + self.selectable_file_paths: dict[int, str] = {} + self.file_path_list: list = [] + self.db_paths = DisplayablePath.make_tree( + root_folder_path, parent=None, criteria=is_in_ignoring_extensions + ) + + def display(self): + """ + Select files from a directory and display the selected files. + """ + count = 0 + file_path_enumeration = {} + file_path_list = [] + for path in self.db_paths: + n_digits = len(str(count)) + n_spaces = 3 - n_digits + if n_spaces < 0: + # We can only print 1000 aligned files. I think it is decent enough + n_spaces = 0 + spaces_str = " " * n_spaces + if not path.path.is_dir(): + print(f"{count}. {spaces_str}{path.displayable()}") + file_path_enumeration[count] = path.path + file_path_list.append(path.path) + count += 1 + else: + # By now we do not accept selecting entire dirs. + # But could add that in the future. Just need to add more functions + # and remove this else block... + number_space = " " * n_digits + print(f"{number_space} {spaces_str}{path.displayable()}") + + self.number_of_selectable_items = count + self.file_path_list = file_path_list + self.selectable_file_paths = file_path_enumeration + + def ask_for_selection(self) -> List[str]: + """ + Ask user to select files from the terminal after displaying it + + Returns: + List[str]: list of selected paths + """ + user_input = input( + "\nSelect files by entering the numbers separated by commas/spaces or " + + "specify range with a dash. " + + "Example: 1,2,3-5,7,9,13-15,18,20 (enter 'all' to select everything)" + + "\n\nSelect files:" + ) + selected_paths = [] + regex = r"\d+(-\d+)?([, ]\d+(-\d+)?)*" + + if user_input.lower() == "all": + selected_paths = self.file_path_list + elif re.match(regex, user_input): + try: + user_input = ( + user_input.replace("", ",") if " " in user_input else user_input + ) + selected_files = user_input.split(",") + for file_number_str in selected_files: + if "-" in file_number_str: + start_str, end_str = file_number_str.split("-") + start = int(start_str) + end = int(end_str) + for num in range(start, end + 1): + selected_paths.append(str(self.selectable_file_paths[num])) + else: + num = int(file_number_str) + selected_paths.append(str(self.selectable_file_paths[num])) + + except ValueError: + pass + else: + print("Please use a valid number/series of numbers.\n") + sys.exit(1) + + return selected_paths + +def is_in_ignoring_extensions(path: Path) -> bool: + """ + Check if a path is not hidden or in the __pycache__ directory. + + Args: + path: The path to check. + + Returns: + bool: True if the path is not in ignored rules. False otherwise. + """ + is_hidden = not path.name.startswith(".") + is_pycache = "__pycache__" not in path.name + return is_hidden and is_pycache + + +def ask_for_files(db_input) -> dict[str, str]: + """ + Ask user to select files to improve. + It can be done by terminal, gui, or using the old selection. + + Returns: + dict[str, str]: Dictionary where key = file name and value = file path + """ + use_last_string = "" + is_valid_selection = False + can_use_last = False + if "file_list.txt" in db_input: + can_use_last = True + use_last_string = ( + "3. Use previous file list (available at " + + f"{os.path.join(db_input.path, 'file_list.txt')})\n" + ) + selection_number = 3 + else: + selection_number = 1 + selection_str = f"""How do you want to select the files? + +1. Use Command-Line. +2. Use File explorer. +{use_last_string if len(use_last_string) > 1 else ""} +Select option and press Enter (default={selection_number}): """ + file_path_list = [] + selected_number_str = input(selection_str) + if selected_number_str: + try: + selection_number = int(selected_number_str) + except ValueError: + print("Invalid number. Select a number from the list above.\n") + sys.exit(1) + if selection_number == 1: + # Open terminal selection + file_path_list = terminal_file_selector() + is_valid_selection = True + elif selection_number == 2: + # Open GUI selection + file_path_list = gui_file_selector() + is_valid_selection = True + else: + if can_use_last and selection_number == 3: + # Use previous file list + is_valid_selection = True + if not is_valid_selection: + print("Invalid number. Select a number from the list above.\n") + sys.exit(1) + + file_list_string = "" + file_path_info = {} + if not selection_number == 3: + # New files + for file_path in file_path_list: + file_list_string += str(file_path) + "\n" + # Return a dict with key=file_name and value=file_path + file_path_info[os.path.basename(file_path).split("/")[-1]] = file_path + # Write in file_list so the user can edit and remember what was done + db_input["file_list.txt"] = file_list_string + else: + # If using the the previous file list, we don't need to write file_list.txt + file_list_string = db_input["file_list.txt"] + for file_path in file_path_list: + # Return a dict with key=file_name and value=file_path + file_path_info[os.path.basename(file_path).split("/")[-1]] = file_path + + return file_path_info + + +def gui_file_selector() -> List[str]: + """ + Display a tkinter file selection window to select context files. + """ + root = tk.Tk() + root.withdraw() + root.call("wm", "attributes", ".", "-topmost", True) + file_list = list( + fd.askopenfilenames( + parent=root, + initialdir=os.getcwd(), + title="Select files to improve (or give context):", + ) + ) + return file_list + + +def terminal_file_selector() -> List[str]: + """ + Display a terminal file selection to select context files. + """ + file_selector = TerminalFileSelector(Path(os.getcwd())) + file_selector.display() + selected_list = file_selector.ask_for_selection() + return selected_list diff --git a/gpt_engineer/main.py b/gpt_engineer/main.py index f90f1699ea..2c001f6887 100644 --- a/gpt_engineer/main.py +++ b/gpt_engineer/main.py @@ -1,14 +1,18 @@ import logging import os +import os + from pathlib import Path import openai import typer + from dotenv import load_dotenv -from gpt_engineer.ai import AI, fallback_model +from gpt_engineer.ai import AI + from gpt_engineer.collect import collect_learnings from gpt_engineer.db import DB, DBs, archive from gpt_engineer.learning import collect_consent @@ -31,13 +35,26 @@ def main( steps_config: StepsConfig = typer.Option( StepsConfig.DEFAULT, "--steps", "-s", help="decide which steps to run" ), + improve_option: bool = typer.Option( + False, + "--improve", + "-i", + help="Improve code from existing project.", + ), verbose: bool = typer.Option(False, "--verbose", "-v"), ): logging.basicConfig(level=logging.DEBUG if verbose else logging.INFO) + + # For the improve option take current project as path and add .gpteng folder + # By now, ignoring the 'project_path' argument + if improve_option: + # The default option for the --improve is the IMPROVE_CODE, not DEFAULT + if steps_config == StepsConfig.DEFAULT: + steps_config = StepsConfig.IMPROVE_CODE + load_env_if_needed() - model = fallback_model(model) ai = AI( model_name=model, temperature=temperature, diff --git a/gpt_engineer/preprompts/implement_on_existing b/gpt_engineer/preprompts/implement_on_existing new file mode 100644 index 0000000000..7f1fafc96e --- /dev/null +++ b/gpt_engineer/preprompts/implement_on_existing @@ -0,0 +1,19 @@ +You are a expert developer and you are tasked to work in fixing an issue or creating a new functionality in a set of existing codebase. +After reading the implementation request, you will receive the code one by one to you. +Based on the request, you need to comprehend what needs to be modified by understanding the implementation of the received code and the interconnections between each file, function and classes. +Think step by step and reason yourself to the right decisions to make sure we get it right. +The files are organized in the following way: + +FILENAME is the lowercase file name including the file extension and CODE is the code: + +FILENAME +``` +CODE +``` + +After that, implement the requested functionality by using the received code as base. +The received code not necessarily will be a standalone implementation, it could be part of a bigger project which you might not have all files. Trust that everything you need to know inside the received files. +Follow the same coding language and framework observed in the received code, using appropriate best practice file naming convention. +Make sure that code in different files are compatible with each other. +Ensure to implement the requested modification, if you are unsure, write a plausible implementation. +Before you finish, double check that all parts of the architecture is present in the files. diff --git a/gpt_engineer/steps.py b/gpt_engineer/steps.py index 1401e27bba..eb843222fe 100644 --- a/gpt_engineer/steps.py +++ b/gpt_engineer/steps.py @@ -9,8 +9,14 @@ from termcolor import colored from gpt_engineer.ai import AI -from gpt_engineer.chat_to_files import to_files +from gpt_engineer.chat_to_files import ( + format_file_to_input, + get_code_strings, + overwrite_files, + to_files, +) from gpt_engineer.db import DBs +from gpt_engineer.file_selector import ask_for_files from gpt_engineer.learning import human_input Message = Union[AIMessage, HumanMessage, SystemMessage] @@ -22,6 +28,17 @@ def setup_sys_prompt(dbs: DBs) -> str: ) +def setup_sys_prompt_existing_code(dbs: DBs) -> str: + """ + Similar to code generation, but using an existing code base. + """ + return ( + dbs.preprompts["implement_on_existing"] + + "\nUseful to know:\n" + + dbs.preprompts["philosophy"] + ) + + def get_prompt(dbs: DBs) -> str: """While we migrate we have this fallback getter""" assert ( @@ -255,6 +272,105 @@ def use_feedback(ai: AI, dbs: DBs): return messages +def improve_existing_code(ai: AI, dbs: DBs): + """ + Ask the user for a list of paths, ask the AI agent to + improve, fix or add a new functionality + A file selection will appear to select the files. + The terminal will ask for the prompt. + """ + file_path_info = ask_for_files(dbs.input) + files_info = get_code_strings(dbs.input) + dbs.input["prompt"] = input( + "\nWhat do you need to improve with the selected files?\n" + ) + + confirm_str = f""" +----------------------------- +The following files will be used in the improvement process: +{dbs.input["file_list.txt"]} + +The inserted prompt is the following: +'{dbs.input['prompt']}' +----------------------------- + +You can change these files in .gpteng folder ({dbs.input.path}) in your project +before proceeding. + +Press enter to proceed with modifications. + +""" + input(confirm_str) + messages = [ + ai.fsystem(setup_sys_prompt_existing_code(dbs)), + ai.fuser(f"Instructions: {dbs.input['prompt']}"), + ] + # Add files as input + for file_name, file_str in files_info.items(): + code_input = format_file_to_input(file_name, file_str) + messages.append(ai.fuser(f"{code_input}")) + + messages = ai.next(messages, step_name=curr_fn()) + # Maybe we should add another step called "replace" or "overwrite" + overwrite_files(messages[-1].content.strip(), dbs, replace_files=file_path_info) + return messages + + +def improve_existing_code(ai: AI, dbs: DBs): + """ + Ask the user for a list of paths, ask the AI agent to + improve, fix or add a new functionality + A file selection will appear to select the files. + The terminal will ask for the prompt. + """ + file_path_info = ask_for_files(dbs.input) + files_info = get_code_strings(dbs.input) + dbs.input["prompt"] = input( + "\nWhat do you need to improve with the selected files?\n" + ) + + confirm_str = f""" +----------------------------- +The following files will be used in the improvement process: +{dbs.input["file_list.txt"]} + +The inserted prompt is the following: +'{dbs.input['prompt']}' +----------------------------- + +You can change these files in .gpteng folder ({dbs.input.path}) in your project +before proceeding. + +Press enter to proceed with modifications. + +""" + input(confirm_str) + messages = [ + ai.fsystem(setup_sys_prompt_existing_code(dbs)), + ai.fuser(f"Instructions: {dbs.input['prompt']}"), + ] + # Add files as input + for file_name, file_str in files_info.items(): + code_input = format_file_to_input(file_name, file_str) + messages.append(ai.fuser(f"{code_input}")) + + output_format_str = """ +Make sure the output of any files is in the following format where +FILENAME is the file name including the file extension, +LANG is the markup code block language for the code's language, and CODE is the code: + +FILENAME +```LANG +CODE +``` +""" + + messages = ai.next(messages, output_format_str, step_name=curr_fn()) + # Maybe we should add another step called "replace" or "overwrite" + overwrite_files(messages[-1].content.strip(), dbs, replace_files=file_path_info) + return messages + + def fix_code(ai: AI, dbs: DBs): messages = AI.deserialize_messages(dbs.logs[gen_code.__name__]) code_output = messages[-1].content.strip() @@ -288,6 +404,7 @@ class Config(str, Enum): EXECUTE_ONLY = "execute_only" EVALUATE = "evaluate" USE_FEEDBACK = "use_feedback" + IMPROVE_CODE = "improve_code" # Different configs of what steps to run @@ -338,6 +455,7 @@ class Config(str, Enum): Config.USE_FEEDBACK: [use_feedback, gen_entrypoint, execute_entrypoint, human_review], Config.EXECUTE_ONLY: [execute_entrypoint], Config.EVALUATE: [execute_entrypoint, human_review], + Config.IMPROVE_CODE: [improve_existing_code], } # Future steps that can be added: diff --git a/projects/example/prompt b/projects/example/prompt index 100c878429..7f4d799c3e 100644 --- a/projects/example/prompt +++ b/projects/example/prompt @@ -1 +1 @@ -We are writing snake in python. MVC components split in separate files. Keyboard control. +We are writing snake in python. MVC components split in separate files. Keyboard control. \ No newline at end of file