Skip to content

Commit

Permalink
Merge pull request #979 from gpt-engineer-org/ao/langchain-warnings
Browse files Browse the repository at this point in the history
Remove langchain warnings + file selector improvements
  • Loading branch information
ATheorell authored Jan 19, 2024
2 parents 09438f5 + 878e9df commit 60a0e7a
Show file tree
Hide file tree
Showing 9 changed files with 98 additions and 93 deletions.
29 changes: 16 additions & 13 deletions gpt_engineer/applications/cli/cli_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from gpt_engineer.core.base_memory import BaseMemory
from gpt_engineer.core.default.disk_execution_env import DiskExecutionEnv
from gpt_engineer.core.default.disk_memory import DiskMemory
from gpt_engineer.core.default.paths import ENTRYPOINT_FILE, PREPROMPTS_PATH
from gpt_engineer.core.default.paths import PREPROMPTS_PATH
from gpt_engineer.core.default.steps import (
execute_entrypoint,
gen_code,
Expand Down Expand Up @@ -132,16 +132,19 @@ def improve(
files_dict = self.improve_fn(
self.ai, prompt, files_dict, self.memory, self.preprompts_holder
)
if not execution_command and ENTRYPOINT_FILE not in files_dict:
entrypoint = gen_entrypoint(
self.ai, files_dict, self.memory, self.preprompts_holder
)
combined_dict = {**files_dict, **entrypoint}
files_dict = FilesDict(combined_dict)
files_dict = self.process_code_fn(
self.ai,
self.execution_env,
files_dict,
preprompts_holder=self.preprompts_holder,
)

# No need to run entrypoint for improve right?
# if not execution_command and ENTRYPOINT_FILE not in files_dict:
# entrypoint = gen_entrypoint(
# self.ai, files_dict, self.memory, self.preprompts_holder
# )
# combined_dict = {**files_dict, **entrypoint}
# files_dict = FilesDict(combined_dict)

# files_dict = self.process_code_fn(
# self.ai,
# self.execution_env,
# files_dict,
# preprompts_holder=self.preprompts_holder,
# )
return files_dict
106 changes: 58 additions & 48 deletions gpt_engineer/applications/cli/file_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,16 @@


class FileSelector:
def __init__(self, project_path: Union[str, Path]):
self.IGNORE_FOLDERS = {"site-packages", "node_modules", "venv", ".gpteng"}
self.FILE_LIST_NAME = "file_selection.toml"
self.COMMENT = (
"# Change 'selected' from false to true to include files in the edit. "
"GPT-engineer can only read and edit the files that set to true. "
"Including irrelevant files will degrade coding performance, "
"cost additional tokens and potentially lead to violations "
"of the token limit, resulting in runtime errors.\n\n"
)
IGNORE_FOLDERS = {"site-packages", "node_modules", "venv", "__pycache__"}
FILE_LIST_NAME = "file_selection.toml"
COMMENT = (
"# Remove '#' to select a file.\n\n"
"# gpt-engineer can only read selected files. "
"Including irrelevant files will degrade performance, "
"cost additional tokens and potentially overflow token limit.\n\n"
)

def __init__(self, project_path: Union[str, Path]):
self.project_path = project_path
self.metadata_db = DiskMemory(metadata_path(self.project_path))
self.toml_path = self.metadata_db.path / self.FILE_LIST_NAME
Expand Down Expand Up @@ -77,52 +76,62 @@ def ask_for_files(self) -> FilesDict:
print(f"Warning: File not found {file_path}")
return FilesDict(content_dict)

def editor_file_selector(self, input_path: str, init: bool = True) -> List[str]:
def editor_file_selector(
self, input_path: str | Path, init: bool = True
) -> List[str]:
"""
Provides an interactive file selection interface by generating a tree representation in a .toml file.
Allows users to select or deselect files for the context improvement process.
"""
root_path = Path(input_path)
tree_dict = {
"files": {}
} # Initialize the dictionary to hold file selection state
tree_dict = {}
toml_file = DiskMemory(metadata_path(input_path)).path / "file_selection.toml"
# Define the toml file path

# Initialize .toml file with file tree if in initial state
if init:
for path in DisplayablePath.make_tree(
root_path
): # Create a tree structure from the root path
if path.path.is_dir() or not self.is_utf8(path.path):
continue
relative_path = os.path.relpath(
path.path, input_path
) # Get the relative path of the file
tree_dict["files"][relative_path] = {
"selected": False
} # Initialize file selection as False
tree_dict = {x: "selected" for x in self.get_current_files(root_path)}

# Write instructions and file selection states to .toml file
s = toml.dumps({"files": tree_dict})

# add comments on all lines that match = "selected"
s = "\n".join(
[
"# " + line if line.endswith(' = "selected"') else line
for line in s.split("\n")
]
)
# Write to the toml file
with open(toml_file, "w") as f:
f.write(self.COMMENT)
toml.dump(tree_dict, f)
f.write(s)

else:
# Load existing files from the .toml configuration
all_files = self.get_current_files(root_path)
s = toml.dumps({"files": {x: "selected" for x in all_files}})

with open(toml_file, "r") as file:
existing_files = toml.load(file)
merged_files = self.merge_file_lists(
existing_files["files"], self.get_current_files(root_path)
)
selected_files = toml.load(file)

lines = s.split("\n")
s = "\n".join(
lines[:1]
+ [
line
if line.split(" = ")[0].strip('"') in selected_files["files"]
else "# " + line
for line in lines[1:]
]
)

# Write the merged list back to the .toml for user review and modification
with open(toml_file, "w") as file:
file.write(self.COMMENT) # Ensure to write the comment
toml.dump({"files": merged_files}, file)
file.write(s)

print(
"Please select(true) and deselect(false) files, save it, and close it to continue..."
"Please select and deselect (add # in front) files, save it, and close it to continue..."
)
self.open_with_default_editor(
toml_file
Expand All @@ -138,6 +147,7 @@ def open_with_default_editor(self, file_path):
editors = [
"gedit",
"notepad",
"nvim",
"write",
"nano",
"vim",
Expand Down Expand Up @@ -182,9 +192,8 @@ def get_files_from_toml(self, input_path, toml_file):
edited_tree = toml.load(toml_file) # Load the edited .toml file

# Iterate through the files in the .toml and append selected files to the list
for file, properties in edited_tree["files"].items():
if properties.get("selected", False): # Check if the file is selected
selected_files.append(file)
for file, _ in edited_tree["files"].items():
selected_files.append(file)

# Ensure that at least one file is selected, or raise an exception
if not selected_files:
Expand Down Expand Up @@ -212,7 +221,7 @@ def get_files_from_toml(self, input_path, toml_file):
return selected_files

def merge_file_lists(
self, existing_files: Dict[str, Any], new_files: Dict[str, Any]
self, existing_files: list[str], new_files: list[str]
) -> Dict[str, Any]:
"""
Merges the new files list with the existing one, preserving the selection status.
Expand All @@ -225,27 +234,28 @@ def merge_file_lists(

return existing_files

def get_current_files(self, project_path: Union[str, Path]) -> Dict[str, Any]:
def get_current_files(self, project_path: Union[str, Path]) -> list[str]:
"""
Generates a dictionary of all files in the project directory
with their selection status set to False by default.
"""
all_files = {}
all_files = []
project_path = Path(
project_path
).resolve() # Ensure path is absolute and resolved

for path in project_path.glob("**/*"): # Recursively list all files
if path.is_file():
# Normalize and compare each part of the path
if not any(
part in self.IGNORE_FOLDERS
for part in path.relative_to(project_path).parts
) and not path.name.startswith("."):
relative_path = str(
path.relative_to(project_path)
) # Store relative paths
all_files[relative_path] = {"selected": False}
relpath = path.relative_to(project_path)

parts = relpath.parts
if any(part.startswith(".") for part in parts):
continue # Skip hidden fileso
if any(part in self.IGNORE_FOLDERS for part in parts):
continue

all_files.append(str(relpath))

return all_files

def is_in_ignoring_extensions(self, path: Path) -> bool:
Expand Down
19 changes: 7 additions & 12 deletions gpt_engineer/applications/cli/main.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,12 @@
"""
This module provides a CLI tool to interact with the GPT Engineer application,
enabling users to use OpenAI's models and define various parameters for the
project they want to generate, improve or interact with.
Entrypoint for the CLI tool.
Main Functionality:
---------------------
- Load environment variables needed to work with OpenAI.
- Allow users to specify parameters such as:
- Project path
- Model type (default to GPT-4)
- LLM
- Temperature
- Step configurations
- Code improvement mode
Expand Down Expand Up @@ -153,12 +151,10 @@ def main(
azure_endpoint=azure_endpoint,
)

# project_path = os.path.abspath(
# project_path
# ) # resolve the string to a valid path (eg "a/b/../c" to "a/c")
path = Path(project_path) # .absolute()
path = Path(project_path)
print("Running gpt-engineer in", path.absolute(), "\n")
prompt = load_prompt(DiskMemory(path), improve_mode)

# configure generation function
if clarify_mode:
code_gen_fn = clarified_gen
Expand Down Expand Up @@ -195,13 +191,12 @@ def main(
files_dict = agent.improve(files_dict, prompt)
else:
files_dict = agent.init(prompt)
# collect user feedback if user consents
config = (code_gen_fn.__name__, execution_fn.__name__)
collect_and_send_human_review(prompt, model, temperature, config, agent.memory)

store.upload(files_dict)

# collect user feedback if user consents
config = (code_gen_fn.__name__, execution_fn.__name__)
collect_and_send_human_review(prompt, model, temperature, config, agent.memory)

print("Total api cost: $ ", ai.token_usage_log.usage_cost())


Expand Down
14 changes: 7 additions & 7 deletions gpt_engineer/core/ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import openai

from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.chat_models.base import BaseChatModel
from langchain.schema import (
AIMessage,
Expand All @@ -19,6 +18,7 @@
messages_from_dict,
messages_to_dict,
)
from langchain_community.chat_models import AzureChatOpenAI, ChatOpenAI

from gpt_engineer.core.token_usage import TokenUsageLog

Expand Down Expand Up @@ -115,8 +115,7 @@ def next(

logger.debug(f"Creating a new chat completion: {messages}")

callbacks = [StreamingStdOutCallbackHandler()]
response = self.backoff_inference(messages, callbacks)
response = self.backoff_inference(messages)

self.token_usage_log.update_log(
messages=messages, answer=response.content, step_name=step_name
Expand All @@ -129,7 +128,7 @@ def next(
@backoff.on_exception(
backoff.expo, openai.error.RateLimitError, max_tries=7, max_time=45
)
def backoff_inference(self, messages, callbacks):
def backoff_inference(self, messages):
"""
Perform inference using the language model while implementing an exponential backoff strategy.
Expand Down Expand Up @@ -160,10 +159,9 @@ def backoff_inference(self, messages, callbacks):
Example
-------
>>> messages = [SystemMessage(content="Hello"), HumanMessage(content="How's the weather?")]
>>> callbacks = [some_logging_callback]
>>> response = backoff_inference(messages, callbacks)
>>> response = backoff_inference(messages)
"""
return self.llm(messages, callbacks=callbacks) # type: ignore
return self.llm.invoke(messages) # type: ignore

@staticmethod
def serialize_messages(messages: List[Message]) -> str:
Expand Down Expand Up @@ -229,13 +227,15 @@ def _create_chat_model(self) -> BaseChatModel:
deployment_name=self.model_name,
openai_api_type="azure",
streaming=self.streaming,
callbacks=[StreamingStdOutCallbackHandler()],
)

return ChatOpenAI(
model=self.model_name,
temperature=self.temperature,
streaming=self.streaming,
client=openai.ChatCompletion,
callbacks=[StreamingStdOutCallbackHandler()],
)


Expand Down
2 changes: 2 additions & 0 deletions gpt_engineer/core/default/steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,12 @@ def improve(
messages = [
SystemMessage(content=setup_sys_prompt_existing_code(preprompts)),
]

# Add files as input
messages.append(HumanMessage(content=f"{files_dict.to_chat()}"))
messages.append(HumanMessage(content=f"Request: {prompt}"))
problems = [""]

# check edit correctness
edit_refinements = 0
while len(problems) > 0 and edit_refinements <= MAX_EDIT_REFINEMENT_STEPS:
Expand Down
11 changes: 5 additions & 6 deletions gpt_engineer/preprompts/improve
Original file line number Diff line number Diff line change
@@ -1,8 +1,7 @@
Act as an expert software developer.
Always use best practices when coding.
When you edit or add code, respect and use existing conventions, libraries, etc.
You are an expert software developer.
Respect existing conventions, libraries, etc.

Take requests for changes to the supplied code, and then you MUST
You MUST
1. (planning) Think step-by-step and explain the needed changes. Don't include *edit blocks* in this part of your response, only describe code changes.
2. (output) Describe each change with an *edit block* per the example below.

Expand Down Expand Up @@ -38,7 +37,7 @@ example_1.py
```

```python
example_1.py
example_2.py
<<<<<<< HEAD
def add_one(a,b):
a = a+2
Expand All @@ -49,7 +48,7 @@ example_1.py
```

```python
example_1.py
example_2.py
<<<<<<< HEAD
class DBS:
db = 'aaa'
Expand Down
2 changes: 1 addition & 1 deletion tests/ai_cache.json

Large diffs are not rendered by default.

6 changes: 1 addition & 5 deletions tests/applications/cli/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,7 @@ def improve_generator():
)
os.environ["GPTE_TEST_MODE"] = "True"
simplified_main(str(p), "improve")
ex_env = DiskExecutionEnv(path=p)
ex_env.run(f"bash {ENTRYPOINT_FILE}")
assert (p / "output.txt").exists()
text = (p / "output.txt").read_text().strip()
assert text == "hello"
DiskExecutionEnv(path=p)
del os.environ["GPTE_TEST_MODE"]

# Runs gpt-engineer with lite mode and generates a project with only the main prompt.
Expand Down
2 changes: 1 addition & 1 deletion tests/core/test_ai.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from langchain.chat_models.base import BaseChatModel
from langchain.chat_models.fake import FakeListChatModel
from langchain_community.chat_models.fake import FakeListChatModel

from gpt_engineer.core.ai import AI

Expand Down

0 comments on commit 60a0e7a

Please sign in to comment.