Merge pull request #979 from gpt-engineer-org/ao/langchain-warnings

Remove langchain warnings + file selector improvements
AntonOsika · Jan 19, 2024 · 60a0e7a · 60a0e7a
2 parents 09438f5 + 878e9df
commit 60a0e7a
Show file tree

Hide file tree

Showing 9 changed files with 98 additions and 93 deletions.
diff --git a/gpt_engineer/applications/cli/cli_agent.py b/gpt_engineer/applications/cli/cli_agent.py
@@ -7,7 +7,7 @@
 from gpt_engineer.core.base_memory import BaseMemory
 from gpt_engineer.core.default.disk_execution_env import DiskExecutionEnv
 from gpt_engineer.core.default.disk_memory import DiskMemory
-from gpt_engineer.core.default.paths import ENTRYPOINT_FILE, PREPROMPTS_PATH
+from gpt_engineer.core.default.paths import PREPROMPTS_PATH
 from gpt_engineer.core.default.steps import (
     execute_entrypoint,
     gen_code,
@@ -132,16 +132,19 @@ def improve(
         files_dict = self.improve_fn(
             self.ai, prompt, files_dict, self.memory, self.preprompts_holder
         )
-        if not execution_command and ENTRYPOINT_FILE not in files_dict:
-            entrypoint = gen_entrypoint(
-                self.ai, files_dict, self.memory, self.preprompts_holder
-            )
-            combined_dict = {**files_dict, **entrypoint}
-            files_dict = FilesDict(combined_dict)
-        files_dict = self.process_code_fn(
-            self.ai,
-            self.execution_env,
-            files_dict,
-            preprompts_holder=self.preprompts_holder,
-        )
+
+        # No need to run entrypoint for improve right?
+        # if not execution_command and ENTRYPOINT_FILE not in files_dict:
+        #     entrypoint = gen_entrypoint(
+        #         self.ai, files_dict, self.memory, self.preprompts_holder
+        #     )
+        #     combined_dict = {**files_dict, **entrypoint}
+        #     files_dict = FilesDict(combined_dict)
+
+        # files_dict = self.process_code_fn(
+        #     self.ai,
+        #     self.execution_env,
+        #     files_dict,
+        #     preprompts_holder=self.preprompts_holder,
+        # )
         return files_dict
diff --git a/gpt_engineer/applications/cli/file_selector.py b/gpt_engineer/applications/cli/file_selector.py
@@ -32,17 +32,16 @@
 
 
 class FileSelector:
-    def __init__(self, project_path: Union[str, Path]):
-        self.IGNORE_FOLDERS = {"site-packages", "node_modules", "venv", ".gpteng"}
-        self.FILE_LIST_NAME = "file_selection.toml"
-        self.COMMENT = (
-            "# Change 'selected' from false to true to include files in the edit. "
-            "GPT-engineer can only read and edit the files that set to true. "
-            "Including irrelevant files will degrade coding performance, "
-            "cost additional tokens and potentially lead to violations "
-            "of the token limit, resulting in runtime errors.\n\n"
-        )
+    IGNORE_FOLDERS = {"site-packages", "node_modules", "venv", "__pycache__"}
+    FILE_LIST_NAME = "file_selection.toml"
+    COMMENT = (
+        "# Remove '#' to select a file.\n\n"
+        "# gpt-engineer can only read selected files. "
+        "Including irrelevant files will degrade performance, "
+        "cost additional tokens and potentially overflow token limit.\n\n"
+    )
 
+    def __init__(self, project_path: Union[str, Path]):
         self.project_path = project_path
         self.metadata_db = DiskMemory(metadata_path(self.project_path))
         self.toml_path = self.metadata_db.path / self.FILE_LIST_NAME
@@ -77,52 +76,62 @@ def ask_for_files(self) -> FilesDict:
                 print(f"Warning: File not found {file_path}")
         return FilesDict(content_dict)
 
-    def editor_file_selector(self, input_path: str, init: bool = True) -> List[str]:
+    def editor_file_selector(
+        self, input_path: str | Path, init: bool = True
+    ) -> List[str]:
         """
         Provides an interactive file selection interface by generating a tree representation in a .toml file.
         Allows users to select or deselect files for the context improvement process.
         """
         root_path = Path(input_path)
-        tree_dict = {
-            "files": {}
-        }  # Initialize the dictionary to hold file selection state
+        tree_dict = {}
         toml_file = DiskMemory(metadata_path(input_path)).path / "file_selection.toml"
         # Define the toml file path
 
         # Initialize .toml file with file tree if in initial state
         if init:
-            for path in DisplayablePath.make_tree(
-                root_path
-            ):  # Create a tree structure from the root path
-                if path.path.is_dir() or not self.is_utf8(path.path):
-                    continue
-                relative_path = os.path.relpath(
-                    path.path, input_path
-                )  # Get the relative path of the file
-                tree_dict["files"][relative_path] = {
-                    "selected": False
-                }  # Initialize file selection as False
+            tree_dict = {x: "selected" for x in self.get_current_files(root_path)}
 
-            # Write instructions and file selection states to .toml file
+            s = toml.dumps({"files": tree_dict})
 
+            # add comments on all lines that match = "selected"
+            s = "\n".join(
+                [
+                    "# " + line if line.endswith(' = "selected"') else line
+                    for line in s.split("\n")
+                ]
+            )
+            # Write to the toml file
             with open(toml_file, "w") as f:
                 f.write(self.COMMENT)
-                toml.dump(tree_dict, f)
+                f.write(s)
+
         else:
             # Load existing files from the .toml configuration
+            all_files = self.get_current_files(root_path)
+            s = toml.dumps({"files": {x: "selected" for x in all_files}})
+
             with open(toml_file, "r") as file:
-                existing_files = toml.load(file)
-                merged_files = self.merge_file_lists(
-                    existing_files["files"], self.get_current_files(root_path)
-                )
+                selected_files = toml.load(file)
+
+            lines = s.split("\n")
+            s = "\n".join(
+                lines[:1]
+                + [
+                    line
+                    if line.split(" = ")[0].strip('"') in selected_files["files"]
+                    else "# " + line
+                    for line in lines[1:]
+                ]
+            )
 
             # Write the merged list back to the .toml for user review and modification
             with open(toml_file, "w") as file:
                 file.write(self.COMMENT)  # Ensure to write the comment
-                toml.dump({"files": merged_files}, file)
+                file.write(s)
 
         print(
-            "Please select(true) and deselect(false) files, save it, and close it to continue..."
+            "Please select and deselect (add # in front) files, save it, and close it to continue..."
         )
         self.open_with_default_editor(
             toml_file
@@ -138,6 +147,7 @@ def open_with_default_editor(self, file_path):
         editors = [
             "gedit",
             "notepad",
+            "nvim",
             "write",
             "nano",
             "vim",
@@ -182,9 +192,8 @@ def get_files_from_toml(self, input_path, toml_file):
         edited_tree = toml.load(toml_file)  # Load the edited .toml file
 
         # Iterate through the files in the .toml and append selected files to the list
-        for file, properties in edited_tree["files"].items():
-            if properties.get("selected", False):  # Check if the file is selected
-                selected_files.append(file)
+        for file, _ in edited_tree["files"].items():
+            selected_files.append(file)
 
         # Ensure that at least one file is selected, or raise an exception
         if not selected_files:
@@ -212,7 +221,7 @@ def get_files_from_toml(self, input_path, toml_file):
         return selected_files
 
     def merge_file_lists(
-        self, existing_files: Dict[str, Any], new_files: Dict[str, Any]
+        self, existing_files: list[str], new_files: list[str]
     ) -> Dict[str, Any]:
         """
         Merges the new files list with the existing one, preserving the selection status.
@@ -225,27 +234,28 @@ def merge_file_lists(
 
         return existing_files
 
-    def get_current_files(self, project_path: Union[str, Path]) -> Dict[str, Any]:
+    def get_current_files(self, project_path: Union[str, Path]) -> list[str]:
         """
         Generates a dictionary of all files in the project directory
         with their selection status set to False by default.
         """
-        all_files = {}
+        all_files = []
         project_path = Path(
             project_path
         ).resolve()  # Ensure path is absolute and resolved
 
         for path in project_path.glob("**/*"):  # Recursively list all files
             if path.is_file():
-                # Normalize and compare each part of the path
-                if not any(
-                    part in self.IGNORE_FOLDERS
-                    for part in path.relative_to(project_path).parts
-                ) and not path.name.startswith("."):
-                    relative_path = str(
-                        path.relative_to(project_path)
-                    )  # Store relative paths
-                    all_files[relative_path] = {"selected": False}
+                relpath = path.relative_to(project_path)
+
+                parts = relpath.parts
+                if any(part.startswith(".") for part in parts):
+                    continue  # Skip hidden fileso
+                if any(part in self.IGNORE_FOLDERS for part in parts):
+                    continue
+
+                all_files.append(str(relpath))
+
         return all_files
 
     def is_in_ignoring_extensions(self, path: Path) -> bool:

diff --git a/gpt_engineer/applications/cli/main.py b/gpt_engineer/applications/cli/main.py
@@ -1,14 +1,12 @@
 """
-This module provides a CLI tool to interact with the GPT Engineer application,
-enabling users to use OpenAI's models and define various parameters for the
-project they want to generate, improve or interact with.
+Entrypoint for the CLI tool.
 
 Main Functionality:
 ---------------------
 - Load environment variables needed to work with OpenAI.
 - Allow users to specify parameters such as:
   - Project path
-  - Model type (default to GPT-4)
+  - LLM
   - Temperature
   - Step configurations
   - Code improvement mode
@@ -153,12 +151,10 @@ def main(
         azure_endpoint=azure_endpoint,
     )
 
-    # project_path = os.path.abspath(
-    #     project_path
-    # )  # resolve the string to a valid path (eg "a/b/../c" to "a/c")
-    path = Path(project_path)  # .absolute()
+    path = Path(project_path)
     print("Running gpt-engineer in", path.absolute(), "\n")
     prompt = load_prompt(DiskMemory(path), improve_mode)
+
     # configure generation function
     if clarify_mode:
         code_gen_fn = clarified_gen
@@ -195,13 +191,12 @@ def main(
         files_dict = agent.improve(files_dict, prompt)
     else:
         files_dict = agent.init(prompt)
+        # collect user feedback if user consents
+        config = (code_gen_fn.__name__, execution_fn.__name__)
+        collect_and_send_human_review(prompt, model, temperature, config, agent.memory)
 
     store.upload(files_dict)
 
-    # collect user feedback if user consents
-    config = (code_gen_fn.__name__, execution_fn.__name__)
-    collect_and_send_human_review(prompt, model, temperature, config, agent.memory)
-
     print("Total api cost: $ ", ai.token_usage_log.usage_cost())
 
 

diff --git a/gpt_engineer/core/ai.py b/gpt_engineer/core/ai.py
@@ -10,7 +10,6 @@
 import openai
 
 from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
-from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
 from langchain.chat_models.base import BaseChatModel
 from langchain.schema import (
     AIMessage,
@@ -19,6 +18,7 @@
     messages_from_dict,
     messages_to_dict,
 )
+from langchain_community.chat_models import AzureChatOpenAI, ChatOpenAI
 
 from gpt_engineer.core.token_usage import TokenUsageLog
 
@@ -115,8 +115,7 @@ def next(
 
         logger.debug(f"Creating a new chat completion: {messages}")
 
-        callbacks = [StreamingStdOutCallbackHandler()]
-        response = self.backoff_inference(messages, callbacks)
+        response = self.backoff_inference(messages)
 
         self.token_usage_log.update_log(
             messages=messages, answer=response.content, step_name=step_name
@@ -129,7 +128,7 @@ def next(
     @backoff.on_exception(
         backoff.expo, openai.error.RateLimitError, max_tries=7, max_time=45
     )
-    def backoff_inference(self, messages, callbacks):
+    def backoff_inference(self, messages):
         """
         Perform inference using the language model while implementing an exponential backoff strategy.
 
@@ -160,10 +159,9 @@ def backoff_inference(self, messages, callbacks):
         Example
         -------
         >>> messages = [SystemMessage(content="Hello"), HumanMessage(content="How's the weather?")]
-        >>> callbacks = [some_logging_callback]
-        >>> response = backoff_inference(messages, callbacks)
+        >>> response = backoff_inference(messages)
         """
-        return self.llm(messages, callbacks=callbacks)  # type: ignore
+        return self.llm.invoke(messages)  # type: ignore
 
     @staticmethod
     def serialize_messages(messages: List[Message]) -> str:
@@ -229,13 +227,15 @@ def _create_chat_model(self) -> BaseChatModel:
                 deployment_name=self.model_name,
                 openai_api_type="azure",
                 streaming=self.streaming,
+                callbacks=[StreamingStdOutCallbackHandler()],
             )
 
         return ChatOpenAI(
             model=self.model_name,
             temperature=self.temperature,
             streaming=self.streaming,
             client=openai.ChatCompletion,
+            callbacks=[StreamingStdOutCallbackHandler()],
         )
 
 

diff --git a/gpt_engineer/core/default/steps.py b/gpt_engineer/core/default/steps.py
@@ -160,10 +160,12 @@ def improve(
     messages = [
         SystemMessage(content=setup_sys_prompt_existing_code(preprompts)),
     ]
+
     # Add files as input
     messages.append(HumanMessage(content=f"{files_dict.to_chat()}"))
     messages.append(HumanMessage(content=f"Request: {prompt}"))
     problems = [""]
+
     # check edit correctness
     edit_refinements = 0
     while len(problems) > 0 and edit_refinements <= MAX_EDIT_REFINEMENT_STEPS:

diff --git a/gpt_engineer/preprompts/improve b/gpt_engineer/preprompts/improve
@@ -1,8 +1,7 @@
-Act as an expert software developer.
-Always use best practices when coding.
-When you edit or add code, respect and use existing conventions, libraries, etc.
+You are an expert software developer.
+Respect existing conventions, libraries, etc.
 
-Take requests for changes to the supplied code, and then you MUST
+You MUST
 1. (planning) Think step-by-step and explain the needed changes. Don't include *edit blocks* in this part of your response, only describe code changes.
 2. (output) Describe each change with an *edit block* per the example below.
 
@@ -38,7 +37,7 @@ example_1.py
 ```
 
 ```python
-example_1.py
+example_2.py
 <<<<<<< HEAD
     def add_one(a,b):
         a = a+2
@@ -49,7 +48,7 @@ example_1.py
 ```
 
 ```python
-example_1.py
+example_2.py
 <<<<<<< HEAD
     class DBS:
         db = 'aaa'

diff --git a/tests/ai_cache.json b/tests/ai_cache.json
diff --git a/tests/applications/cli/test_main.py b/tests/applications/cli/test_main.py
@@ -86,11 +86,7 @@ def improve_generator():
         )
         os.environ["GPTE_TEST_MODE"] = "True"
         simplified_main(str(p), "improve")
-        ex_env = DiskExecutionEnv(path=p)
-        ex_env.run(f"bash {ENTRYPOINT_FILE}")
-        assert (p / "output.txt").exists()
-        text = (p / "output.txt").read_text().strip()
-        assert text == "hello"
+        DiskExecutionEnv(path=p)
         del os.environ["GPTE_TEST_MODE"]
 
     #  Runs gpt-engineer with lite mode and generates a project with only the main prompt.

diff --git a/tests/core/test_ai.py b/tests/core/test_ai.py
@@ -1,5 +1,5 @@
 from langchain.chat_models.base import BaseChatModel
-from langchain.chat_models.fake import FakeListChatModel
+from langchain_community.chat_models.fake import FakeListChatModel
 
 from gpt_engineer.core.ai import AI