Skip to content
This repository has been archived by the owner on Feb 19, 2021. It is now read-only.

Commit

Permalink
Merge pull request #652 from BastianPoe/bugfix-651
Browse files Browse the repository at this point in the history
Make consumer create target directories
  • Loading branch information
MasterofJOKers authored Jan 5, 2021
2 parents b4c585c + 008200d commit a261bba
Show file tree
Hide file tree
Showing 10 changed files with 148 additions and 108 deletions.
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ language: python

before_install:
- sudo apt-get update -qq
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr
- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr imagemagick ghostscript optipng

sudo: false

Expand Down
4 changes: 4 additions & 0 deletions src/documents/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,6 +236,10 @@ def _store(self, text, doc, thumbnail, date):
self.log("debug", "Tagging with {}".format(tag_names))
document.tags.add(*relevant_tags)

# Create directory to store document in
document.create_source_directory()

# Safe document and thumbnail
self._write(document, doc, document.source_path)
self._write(document, thumbnail, document.thumbnail_path)

Expand Down
7 changes: 6 additions & 1 deletion src/documents/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,9 +283,14 @@ def find_renamed_document(self, subdirectory=""):
if self.storage_type == self.STORAGE_TYPE_GPG:
suffix += ".gpg"

# Go up in the directory hierarchy and try to delete all directories
# Start with the (optinally) supplied subdirectory, go up in the
# directory hierarchy and try to find the file in question
root = os.path.normpath(Document.filename_to_path(subdirectory))

# Check if root really exists and return otherwise
if not os.path.isdir(root):
return None

for filename in os.listdir(root):
if filename.endswith(suffix):
return os.path.join(subdirectory, filename)
Expand Down
16 changes: 6 additions & 10 deletions src/documents/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,14 +39,10 @@ class DocumentParser:
`paperless_tesseract.parsers` for inspiration.
"""

SCRATCH = settings.SCRATCH_DIR
DATE_ORDER = settings.DATE_ORDER
FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
OPTIPNG = settings.OPTIPNG_BINARY

def __init__(self, path):
self.document_path = path
self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=self.SCRATCH)
self.tempdir = tempfile.mkdtemp(prefix="paperless-",
dir=settings.SCRATCH_DIR)
self.logger = logging.getLogger(__name__)
self.logging_group = None

Expand All @@ -60,7 +56,7 @@ def optimise_thumbnail(self, in_path):

out_path = os.path.join(self.tempdir, "optipng.png")

args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
args = (settings.OPTIPNG_BINARY, "-o5", in_path, "-out", out_path)
if not subprocess.Popen(args).wait() == 0:
raise ParseError("Optipng failed at {}".format(args))

Expand Down Expand Up @@ -101,13 +97,13 @@ def __parser(ds, date_order):
title = os.path.basename(self.document_path)

# if filename date parsing is enabled, search there first:
if self.FILENAME_DATE_ORDER:
if settings.FILENAME_DATE_ORDER:
self.log("info", "Checking document title for date")
for m in re.finditer(DATE_REGEX, title):
date_string = m.group(0)

try:
date = __parser(date_string, self.FILENAME_DATE_ORDER)
date = __parser(date_string, settings.FILENAME_DATE_ORDER)
except (TypeError, ValueError):
# Skip all matches that do not parse to a proper date
continue
Expand All @@ -133,7 +129,7 @@ def __parser(ds, date_order):
date_string = m.group(0)

try:
date = __parser(date_string, self.DATE_ORDER)
date = __parser(date_string, settings.DATE_ORDER)
except (TypeError, ValueError):
# Skip all matches that do not parse to a proper date
continue
Expand Down
Binary file added src/documents/tests/samples/letter.pdf
Binary file not shown.
79 changes: 77 additions & 2 deletions src/documents/tests/test_consumer.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,89 @@
import re
import os
import shutil

from django.test import TestCase
from unittest import mock
from django.conf import settings
from django.test import TestCase, override_settings
from tempfile import TemporaryDirectory
from unittest import mock

from ..consumer import Consumer
from ..models import FileInfo, Tag


class TestConsumer(TestCase):
SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")

def setUp(self):
self.storage = TemporaryDirectory()
os.makedirs(os.path.join(self.storage.name, "documents", "originals"),
exist_ok=True)
os.makedirs(os.path.join(self.storage.name, "documents", "thumbnails"),
exist_ok=True)
self.storage_override = override_settings(MEDIA_ROOT=self.storage.name)
self.storage_override.enable()

self.tmpdir = TemporaryDirectory()
self.tmpdir_override = override_settings(
CONVERT_TMPDIR=self.tmpdir.name)
self.tmpdir_override.enable()

self.scratchdir = TemporaryDirectory()
self.scratchdir_override = override_settings(
SCRATCH_DIR=self.scratchdir.name)
self.scratchdir_override.enable()

self.consumptiondir = TemporaryDirectory()
self.consumptiondir_override = override_settings(
CONSUMPTION_DIR=self.consumptiondir.name)
self.consumptiondir_override.enable()

def tearDown(self):
self.storage.cleanup()
self.storage_override.disable()
self.tmpdir.cleanup()
self.tmpdir_override.disable()
self.scratchdir.cleanup()
self.scratchdir_override.disable()
self.consumptiondir.cleanup()
self.consumptiondir_override.disable()

@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
def test_file_consumption(self):
myConsumer = Consumer(consume=settings.CONSUMPTION_DIR,
scratch=settings.SCRATCH_DIR)

# Put sample document into consumption folder
shutil.copyfile(os.path.join(self.SAMPLE_FILES, "letter.pdf"),
os.path.join(settings.CONSUMPTION_DIR, "letter.pdf"))

myConsumer.consume_new_files()

# Check if consumed file has been stored correctly
self.assertEqual(os.path.isfile(os.path.join(
settings.MEDIA_ROOT, "documents", "originals", "none",
"letter-0000001.pdf.gpg")), True)

@override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/dummy")
def test_duplicate_file_consumption(self):
myConsumer = Consumer(consume=settings.CONSUMPTION_DIR,
scratch=settings.SCRATCH_DIR)

# Put sample document into consumption folder
shutil.copyfile(os.path.join(self.SAMPLE_FILES, "letter.pdf"),
os.path.join(settings.CONSUMPTION_DIR, "letter.pdf"))
shutil.copyfile(os.path.join(self.SAMPLE_FILES, "letter.pdf"),
os.path.join(settings.CONSUMPTION_DIR, "letter2.pdf"))

myConsumer.consume_new_files()

# Check if consumed file has been stored correctly
self.assertEqual(os.path.isfile(os.path.join(
settings.MEDIA_ROOT, "documents", "originals", "none",
"dummy-0000001.pdf.gpg")), True)
self.assertEqual(os.path.isfile(os.path.join(
settings.MEDIA_ROOT, "documents", "originals", "none",
"dummy-0000002.pdf.gpg")), False)

class DummyParser(object):
pass
Expand Down
Loading

0 comments on commit a261bba

Please sign in to comment.