From f2300208d5e2a5076cbbb4c2aad71096fd040ef9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pawe=C5=82=20Srokosz?= Date: Thu, 12 Oct 2023 18:50:04 +0200 Subject: [PATCH] Fix: slow multipart parsing for huge files with few CR/LF characters --- src/werkzeug/sansio/multipart.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/werkzeug/sansio/multipart.py b/src/werkzeug/sansio/multipart.py index 380993af7..fc8735378 100644 --- a/src/werkzeug/sansio/multipart.py +++ b/src/werkzeug/sansio/multipart.py @@ -251,12 +251,20 @@ def _parse_data(self, data: bytes, *, start: bool) -> tuple[bytes, int, bool]: else: data_start = 0 - if self.buffer.find(b"--" + self.boundary) == -1: + boundary = b"--" + self.boundary + + if self.buffer.find(boundary) == -1: # No complete boundary in the buffer, but there may be # a partial boundary at the end. As the boundary # starts with either a nl or cr find the earliest and # return up to that as data. data_end = del_index = self.last_newline(data[data_start:]) + data_start + # If amount of data after last newline is far from + # possible length of partial boundary, we should + # assume that there is no partial boundary in the buffer + # and return all pending data. + if (len(data) - data_end) > len(b"\n" + boundary): + data_end = del_index = len(data) more_data = True else: match = self.boundary_re.search(data)