From a36e4e7e0b5e3699d9827ce38c7843ac7bf4343c Mon Sep 17 00:00:00 2001 From: Reinder Vos de Wael Date: Thu, 19 Dec 2024 11:18:31 -0500 Subject: [PATCH] fix: Fix finding runs when the needle ends on the last character (#38) --- pyproject.toml | 2 +- src/cmi_docx/paragraph.py | 5 ++++- tests/test_paragraph.py | 13 +++++++++++++ 3 files changed, 18 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1d8c0dc..2f4052e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "cmi-docx" -version = "0.3.6" +version = "0.3.7" description = "Additional tooling for Python-docx." readme = "README.md" requires-python = ">=3.10" diff --git a/src/cmi_docx/paragraph.py b/src/cmi_docx/paragraph.py index 9687467..d5c55b2 100644 --- a/src/cmi_docx/paragraph.py +++ b/src/cmi_docx/paragraph.py @@ -70,10 +70,13 @@ def find_in_runs(self, needle: str) -> list[run.FindRun]: run_finds: list[run.FindRun] = [] run_lengths = [len(run.text) for run in self.paragraph.runs] cumulative_run_lengths = list(itertools.accumulate(run_lengths)) + for occurence in self.find_in_paragraph(needle).character_indices: start_run = bisect.bisect_right(cumulative_run_lengths, occurence[0]) end_run = bisect.bisect_right( - cumulative_run_lengths[:-1], occurence[1], lo=start_run + cumulative_run_lengths[:-1], + occurence[1] - 1, # -1 as the range does not include the last character + lo=start_run, ) start_index = ( diff --git a/tests/test_paragraph.py b/tests/test_paragraph.py index 4bb1fac..98938eb 100644 --- a/tests/test_paragraph.py +++ b/tests/test_paragraph.py @@ -66,6 +66,19 @@ def test_find_in_single_run(sample_paragraph: docx_paragraph.Paragraph) -> None: assert actual[1].character_indices == expected[1].character_indices +def test_find_in_single_run_complete() -> None: + """Tests finding an exact match of a run.""" + document = docx.Document() + para = document.add_paragraph("This is a sample paragraph.") + para.add_run("full-run") + para.add_run("another") + extend_paragraph = paragraph.ExtendParagraph(para) + + actual = extend_paragraph.find_in_runs("full-run") + + assert len(actual[0].runs) == 1 + + def test_replace_single_run(sample_paragraph: docx_paragraph.Paragraph) -> None: """Test replacing text in a paragraph.""" extend_paragraph = paragraph.ExtendParagraph(sample_paragraph)