From 81c888f2f5cbf36356c7cf94073cfa050bb10eba Mon Sep 17 00:00:00 2001 From: Sebastian Krause Date: Fri, 17 Feb 2017 11:26:45 +0100 Subject: [PATCH 1/2] Improved performance and security for ContentStream_readInlineImage. --- PyPDF2/pdf.py | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py index 7b7f08bf3..53ebf4acd 100644 --- a/PyPDF2/pdf.py +++ b/PyPDF2/pdf.py @@ -2840,11 +2840,27 @@ def _readInlineImage(self, stream): # left at beginning of ID tmp = stream.read(3) assert tmp[:2] == b_("ID") - data = b_("") + data = BytesIO() + # Read the inline image, while checking for EI (End Image) operator. while True: - # Read the inline image, while checking for EI (End Image) operator. - tok = stream.read(1) - if tok == b_("E"): + # Read 8 kB at a time and check if the chunk contains the E operator. + buf = stream.read(8192) + + # We have reached the end of the stream, but haven't found the EI operator. + if not buf: + raise utils.PdfReadError("Unexpected end of stream") + + loc = buf.find(b_("E")) + + if loc == -1: + data.write(buf) + else: + # Write out everything before the E. + data.write(buf[0:loc]) + + # Seek back in the stream to read the E next. + stream.seek(loc - len(buf), 1) + tok = stream.read(1) # Check for End Image tok2 = stream.read(1) if tok2 == b_("I"): @@ -2861,14 +2877,12 @@ def _readInlineImage(self, stream): stream.seek(-1, 1) break else: - stream.seek(-1,1) - data += info + stream.seek(-1, 1) + data.write(info) else: stream.seek(-1, 1) - data += tok - else: - data += tok - return {"settings": settings, "data": data} + data.write(tok) + return {"settings": settings, "data": data.getvalue()} def _getData(self): newdata = BytesIO() From 894c61807a66d185f6a73ef5ffbbd0ba56cf66a4 Mon Sep 17 00:00:00 2001 From: Martin Thoma Date: Fri, 15 Apr 2022 13:17:14 +0200 Subject: [PATCH 2/2] Minor style change --- PyPDF2/pdf.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/PyPDF2/pdf.py b/PyPDF2/pdf.py index c3845d392..6d1824384 100644 --- a/PyPDF2/pdf.py +++ b/PyPDF2/pdf.py @@ -2822,11 +2822,9 @@ def _readInlineImage(self, stream): while True: # Read 8 kB at a time and check if the chunk contains the E operator. buf = stream.read(8192) - # We have reached the end of the stream, but haven't found the EI operator. if not buf: raise utils.PdfReadError("Unexpected end of stream") - loc = buf.find(b_("E")) if loc == -1: