Skip to content

Commit

Permalink
Shift to making pdfplumber.open the sole loader
Browse files Browse the repository at this point in the history
- .from_path is now removed
- .load is marked as deprecated, to be removed in 0.6.0
  • Loading branch information
jsvine committed Jul 26, 2020
1 parent ad94ed8 commit 00e789b
Show file tree
Hide file tree
Showing 11 changed files with 79 additions and 38 deletions.
9 changes: 5 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,13 @@ with pdfplumber.open("path/to/file.pdf") as pdf:

### Loading a PDF

`pdfplumber` provides two main ways to load a PDF:
To start working with a PDF, call `pdfplumber.open(x)`, where `x` can be a:

- `pdfplumber.open("path/to/file.pdf")`
- `pdfplumber.load(file_like_object)`
- path to your PDF file
- file object, loaded as bytes
- file-like object, loaded as bytes

Both methods return an instance of the `pdfplumber.PDF` class.
The `open` method returns an instance of the `pdfplumber.PDF` class.

To load a password-protected PDF, pass the `password` keyword argument, e.g., `pdfplumber.open("file.pdf", password = "test")`.

Expand Down
18 changes: 13 additions & 5 deletions pdfplumber/__init__.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
__all__ = [
"__version__",
"utils",
"pdfminer",
"open",
"set_debug",
]

from ._version import __version__
from .pdf import PDF
from . import utils
import pdfminer
import pdfminer.pdftypes
from ._version import __version__
import sys

pdfminer.pdftypes.STRICT = False
pdfminer.pdfinterp.STRICT = False

open = PDF.open

def load(file_or_buffer, **kwargs):
sys.stderr.write("Warning: pdfplumber.load is deprecated. Please use pdfplumber.open (with same arguments) instead.\n")
return PDF(file_or_buffer, **kwargs)

open = PDF.open
# Old idiom
from_path = PDF.open

def set_debug(debug=0):
pdfminer.debug = debug

Expand Down
2 changes: 1 addition & 1 deletion pdfplumber/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def get_page_data(page):

def main():
args = parse_args()
pdf = pdfplumber.load(args.infile, pages=args.pages)
pdf = pdfplumber.open(args.infile, pages=args.pages)
if args.format == "csv":
to_csv(pdf, args.types, args.encoding)
else:
Expand Down
8 changes: 6 additions & 2 deletions pdfplumber/pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from .page import Page
from .utils import decode_text

import pathlib
from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfpage import PDFPage
Expand Down Expand Up @@ -44,8 +45,11 @@ def __init__(self,
self.interpreter = PDFPageInterpreter(rsrcmgr, self.device)

@classmethod
def open(cls, path, **kwargs):
return cls(open(path, "rb"), **kwargs)
def open(cls, path_or_fp, **kwargs):
if isinstance(path_or_fp, (str, pathlib.Path)):
return cls(open(path_or_fp, "rb"), **kwargs)
else:
return cls(path_or_fp, **kwargs)

def process_page(self, page):
self.interpreter.process_page(page)
Expand Down
23 changes: 13 additions & 10 deletions tests/test_basics.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,14 @@

class Test(unittest.TestCase):

def setUp(self):
@classmethod
def setup_class(self):
path = os.path.join(HERE, "pdfs/nics-background-checks-2015-11.pdf")
self.pdf = pdfplumber.from_path(path)
self.pdf = pdfplumber.open(path)

@classmethod
def teardown_class(self):
self.pdf.close()

def test_metadata(self):
metadata = self.pdf.metadata
Expand All @@ -39,17 +44,15 @@ def test(obj):
assert(len(step_2.rects) == 0)

def test_rotation(self):
rotated = pdfplumber.from_path(
os.path.join(HERE, "pdfs/nics-background-checks-2015-11-rotated.pdf")
)
assert(self.pdf.pages[0].width == 1008)
assert(self.pdf.pages[0].height == 612)
path = os.path.join(HERE, "pdfs/nics-background-checks-2015-11-rotated.pdf")
with pdfplumber.open(path) as rotated:
assert(rotated.pages[0].width == 612)
assert(rotated.pages[0].height == 1008)

assert(rotated.pages[0].width == 612)
assert(rotated.pages[0].height == 1008)

assert(rotated.pages[0].cropbox == self.pdf.pages[0].cropbox)
assert(rotated.pages[0].bbox != self.pdf.pages[0].bbox)
assert(rotated.pages[0].cropbox == self.pdf.pages[0].cropbox)
assert(rotated.pages[0].bbox != self.pdf.pages[0].bbox)

def test_password(self):
path = os.path.join(HERE, "pdfs/password-example.pdf")
Expand Down
9 changes: 7 additions & 2 deletions tests/test_ca_warn_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,16 @@ def fix_row_spaces(row):

class Test(unittest.TestCase):

def setUp(self):
@classmethod
def setup_class(self):
path = os.path.join(HERE, "pdfs/WARN-Report-for-7-1-2015-to-03-25-2016.pdf")
self.pdf = pdfplumber.from_path(path)
self.pdf = pdfplumber.open(path)
self.PDF_WIDTH = self.pdf.pages[0].width

@classmethod
def teardown_class(self):
self.pdf.close()

def test_pandas(self):

rect_x0_clusters = utils.cluster_list([ r["x0"]
Expand Down
7 changes: 6 additions & 1 deletion tests/test_display.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,16 @@

class Test(unittest.TestCase):

def setUp(self):
@classmethod
def setup_class(self):
path = os.path.join(HERE, "pdfs/nics-background-checks-2015-11.pdf")
self.pdf = pdfplumber.open(path)
self.im = self.pdf.pages[0].to_image()

@classmethod
def teardown_class(self):
self.pdf.close()

def test_basic_conversion(self):
self.im.reset()
self.im.draw_rect(self.im.page.rects[0])
Expand Down
18 changes: 12 additions & 6 deletions tests/test_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def test_issue_13(self):
"""
Test slightly simplified from gist here: https://github.com/jsvine/pdfplumber/issues/13
"""
pdf = pdfplumber.from_path(
pdf = pdfplumber.open(
os.path.join(HERE, "pdfs/issue-13-151201DSP-Fond-581-90D.pdf")
)

Expand Down Expand Up @@ -72,36 +72,42 @@ def determine_if_checked(checkbox, curve_list):
for rect in rects ])

assert(n_checked == 5)
pdf.close()

def test_issue_14(self):
pdf = pdfplumber.from_path(
pdf = pdfplumber.open(
os.path.join(HERE, "pdfs/cupertino_usd_4-6-16.pdf")
)
assert len(pdf.objects)
pdf.close()

def test_issue_21(self):
pdf = pdfplumber.from_path(
pdf = pdfplumber.open(
os.path.join(HERE, "pdfs/150109DSP-Milw-505-90D.pdf")
)
assert len(pdf.objects)
pdf.close()

def test_issue_33(self):
pdf = pdfplumber.from_path(
pdf = pdfplumber.open(
os.path.join(HERE, "pdfs/issue-33-lorem-ipsum.pdf")
)
assert len(pdf.metadata.keys())
pdf.close()

def test_issue_53(self):
pdf = pdfplumber.from_path(
pdf = pdfplumber.open(
os.path.join(HERE, "pdfs/issue-53-example.pdf")
)
assert len(pdf.objects)
pdf.close()

def test_issue_67(self):
pdf = pdfplumber.from_path(
pdf = pdfplumber.open(
os.path.join(HERE, "pdfs/issue-67-example.pdf")
)
assert len(pdf.metadata.keys())
pdf.close()

def test_pr_77(self):
# via https://github.com/jsvine/pdfplumber/pull/77
Expand Down
10 changes: 7 additions & 3 deletions tests/test_la_precinct_bulletin.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,16 @@ def to_dict(self):
}

class Test(unittest.TestCase):

def setUp(self):
@classmethod
def setup_class(self):
path = os.path.join(HERE, "pdfs/la-precinct-bulletin-2014-p1.pdf")
self.pdf = pdfplumber.from_path(path)
self.pdf = pdfplumber.open(path)
self.PDF_WIDTH = self.pdf.pages[0].width

@classmethod
def teardown_class(self):
self.pdf.close()

def test_pandas(self):
p1 = PrecinctPage(self.pdf.pages[0]).to_dict()
assert(p1["registered_voters"] == 1100)
Expand Down
3 changes: 2 additions & 1 deletion tests/test_list_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,5 @@ class Test(unittest.TestCase):

def test_load(self):
path = os.path.join(HERE, "pdfs/cupertino_usd_4-6-16.pdf")
pdf = pdfplumber.from_path(path)
with pdfplumber.open(path) as pdf:
assert len(pdf.metadata)
10 changes: 7 additions & 3 deletions tests/test_nics_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,16 @@
]

class Test(unittest.TestCase):

def setUp(self):
@classmethod
def setup_class(self):
path = os.path.join(HERE, "pdfs/nics-background-checks-2015-11.pdf")
self.pdf = pdfplumber.from_path(path)
self.pdf = pdfplumber.open(path)
self.PDF_WIDTH = self.pdf.pages[0].width

@classmethod
def teardown_class(self):
self.pdf.close()

def test_plain(self):
page = self.pdf.pages[0]
cropped = page.crop((0, 80, self.PDF_WIDTH, 485))
Expand Down

0 comments on commit 00e789b

Please sign in to comment.