Skip to content

Commit

Permalink
ROB: Fixing infinite loop in ArrayObject read_from_stream (#2928)
Browse files Browse the repository at this point in the history
Fixing infinite loop in ArrayObject read_from_stream if stream ends unexpectedly.
  • Loading branch information
jakep-allenai authored Oct 30, 2024
1 parent 9f647e6 commit 9e0fce7
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 0 deletions.
2 changes: 2 additions & 0 deletions pypdf/generic/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,8 @@ def read_from_stream(
tok = stream.read(1)
while tok.isspace():
tok = stream.read(1)
if tok == b"":
break
if tok == b"%":
stream.seek(-1, 1)
skip_over_comment(stream)
Expand Down
12 changes: 12 additions & 0 deletions tests/test_text_extraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,3 +260,15 @@ def test_layout_mode_space_vertically_font_height_weight():
"PDF extracted text differs from expected value.\n\n"
"Expected:\n\n%r\n\nExtracted:\n\n%r\n\n" % (pdftext, text)
)


@pytest.mark.enable_socket
def test_infinite_loop_arrays():
"""Tests for #2928"""
url = "https://github.com/user-attachments/files/17576546/arrayabruptending.pdf"
name = "arrayabruptending.pdf"
reader = PdfReader(BytesIO(get_data_from_url(url, name=name)))

page = reader.pages[0]
extracted = page.extract_text()
assert "RNA structure comparison" in extracted

0 comments on commit 9e0fce7

Please sign in to comment.