Revert "Improved chat parsing with no AI logic (AntonOsika#120)"

This reverts commit 8facedd.
70ziko · Oct 25, 2023 · 03ac9c5 · 03ac9c5
1 parent 6ce02cf
commit 03ac9c5
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 201 deletions.
diff --git a/gpt_engineer/chat_to_files.py b/gpt_engineer/chat_to_files.py
@@ -2,82 +2,21 @@
 from typing import List, Tuple
 from gpt_engineer.db import DB
 
-# Amount of lines within the code block to consider for filename discovery
-N_CODELINES_FOR_FILENAME_TA = 5
 
-# Default path to use if no filename is found
-DEFAULT_PATH = 'unknown.txt'
+def parse_chat(chat) -> List[Tuple[str, str]]:
+    # Get all ``` blocks
+    regex = r"```(.*?)```"
 
+    matches = re.finditer(regex, chat, re.DOTALL)
 
-def parse_chat(chat: str, verbose: bool = False) -> List[Tuple[str, str]]:
-    '''
-    Parses a chat message and returns a list of tuples containing
-    the file path and the code content for each file.
-    '''
-    code_regex = r'```(.*?)```'
-    filename_regex = r'\b[\w-]+\.[\w]{1,6}\b'
-
-    # Get all ``` (code) blocks
-    code_matches = re.finditer(code_regex, chat, re.DOTALL)
-
-    prev_code_y_end = 0
     files = []
-    for match in code_matches:
-        lines = match.group(1).split('\n')
-        code_y_start = match.start()
-        code_y_end = match.end()
-
-        # Now, we need to get the filename associated with this code block.
-        # We will look for the filename somewhere near the code block start.
-        #
-        # This "somewhere near" is referred to as the "filename_ta", to
-        # resemble a sort-of target area (ta).
-        #
-        # The target area includes the text preceding the code block that
-        # does not belong to previous code blocks ("no_code").
-        # Additionally, as sometimes the filename is defined within
-        # the code block itself, we will also include the first few lines
-        # of the code block in the filename_ta.
-        #
-        # Example:
-        # ```python
-        # # File: entrypoint.py
-        # import pygame
-        # ```
-        #
-        # The amount of lines to consider within the code block is set by
-        # the constant 'N_CODELINES_FOR_FILENAME_TA'.
-        #
-        # Get the "preceding" text, which is located between codeblocks
-        no_code = chat[prev_code_y_end:code_y_start].strip()
-        within_code = '\n'.join(lines[:N_CODELINES_FOR_FILENAME_TA])
-        filename_ta = no_code + '\n' + within_code
-
-        # The path is the filename itself which we greedily match
-        filename = re.search(filename_regex, filename_ta)
-        path = filename.group(0) if filename else DEFAULT_PATH
-
-        # Visualize the filename_ta if verbose
-        if verbose:
-            print('-' * 20)
-            print(f'Path: {path}')
-            print('-' * 20)
-            print(filename_ta)
-            print('-' * 20)
-
-        # Check if its not a false positive
-        #
-        # For instance, the match with ```main.py``` should not be considered.
-        # ```main.py```
-        # ```python
-        # ...
-        # ```
-        if not re.fullmatch(filename_regex, '\n'.join(lines)):
-            # Update the previous code block end
-            prev_code_y_end = code_y_end
-
-            # File and code have been matched, add them to the list
-            files.append((path, '\n'.join(lines[1:])))
+    for match in matches:
+        path = match.group(1).split("\n")[0]
+        # Get the code
+        code = match.group(1).split("\n")[1:]
+        code = "\n".join(code)
+        # Add the file to the list
+        files.append((path, code))
 
     return files
 

diff --git a/tests/test_chat_parser.py b/tests/test_chat_parser.py