diff --git a/PyPDF2/_cmap.py b/PyPDF2/_cmap.py index 2a9e2f0d2..05dea9bfb 100644 --- a/PyPDF2/_cmap.py +++ b/PyPDF2/_cmap.py @@ -205,6 +205,8 @@ def parse_to_unicode( def prepare_cm(ft: DictionaryObject) -> bytes: cm: bytes = cast(DecodedStreamObject, ft["/ToUnicode"]).get_data() + if isinstance(cm, str): + cm = cm.encode() # we need to prepare cm before due to missing return line in pdf printed to pdf from word cm = ( cm.strip() diff --git a/tests/test_cmap.py b/tests/test_cmap.py index df472da6c..fd63fdd0e 100644 --- a/tests/test_cmap.py +++ b/tests/test_cmap.py @@ -84,3 +84,10 @@ def test_iss1370(): name = "cmap1370.pdf" reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) reader.pages[0].extract_text() + + +def test_iss1370(): + url = "https://github.com/py-pdf/PyPDF2/files/9712729/02voc.pdf" + name = "02voc.pdf" + reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name))) + reader.pages[2].extract_text()