diff --git a/pdfplumber/page.py b/pdfplumber/page.py index 273f3361..8da69737 100644 --- a/pdfplumber/page.py +++ b/pdfplumber/page.py @@ -383,14 +383,10 @@ def outside_bbox( """ Same as .crop, except only includes objects fully within the bbox """ - p = CroppedPage( + return CroppedPage( self, bbox, relative=relative, strict=strict, crop_fn=utils.outside_bbox ) - # Reset, because this operation should not actually change bbox - p.bbox = self.bbox - return p - def filter(self, test_function: Callable[[T_obj], bool]) -> "FilteredPage": return FilteredPage(self, test_function) @@ -490,27 +486,31 @@ class CroppedPage(DerivedPage): def __init__( self, parent_page: Page, - bbox: T_bbox, + crop_bbox: T_bbox, crop_fn: Callable[[T_obj_list, T_bbox], T_obj_list] = utils.crop_to_bbox, relative: bool = False, strict: bool = True, ): if relative: o_x0, o_top, _, _ = parent_page.bbox - x0, top, x1, bottom = bbox - self.bbox = (x0 + o_x0, top + o_top, x1 + o_x0, bottom + o_top) - else: - self.bbox = bbox + x0, top, x1, bottom = crop_bbox + crop_bbox = (x0 + o_x0, top + o_top, x1 + o_x0, bottom + o_top) if strict: - test_proposed_bbox(self.bbox, parent_page.bbox) + test_proposed_bbox(crop_bbox, parent_page.bbox) def _crop_fn(objs: T_obj_list) -> T_obj_list: - return crop_fn(objs, bbox) + return crop_fn(objs, crop_bbox) + + super().__init__(parent_page) self._crop_fn = _crop_fn - super().__init__(parent_page) + # Note: testing for original function passed, not _crop_fn + if crop_fn is utils.outside_bbox: + self.bbox = parent_page.bbox + else: + self.bbox = crop_bbox @property def objects(self) -> Dict[str, T_obj_list]: diff --git a/tests/test_basics.py b/tests/test_basics.py index 30b2565c..7d8d2008 100644 --- a/tests/test_basics.py +++ b/tests/test_basics.py @@ -102,6 +102,14 @@ def test_relative_crop(self): (0.5 * float(bottom.width), 0, bottom.width, bottom.height), relative=True ) + # An extra test for issue #914, in which relative crops were + # using the the wrong bboxes for cropping, leading to empty object-lists + crop_right = page.crop((page.width / 2, 0, page.width, page.height)) + crop_right_again_rel = crop_right.crop( + (0, 0, crop_right.width / 2, page.height), relative=True + ) + assert len(crop_right_again_rel.chars) + def test_invalid_crops(self): page = self.pdf.pages[0] with pytest.raises(ValueError):