Merge pull request #652 from BastianPoe/bugfix-651

Make consumer create target directories
the-paperless-project · Jan 5, 2021 · a261bba · a261bba
2 parents b4c585c + 008200d
commit a261bba
Show file tree

Hide file tree

Showing 10 changed files with 148 additions and 108 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -2,7 +2,7 @@ language: python
 
 before_install:
 - sudo apt-get update -qq
-- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr
+- sudo apt-get install -qq libpoppler-cpp-dev unpaper tesseract-ocr imagemagick ghostscript optipng
 
 sudo: false
 

diff --git a/src/documents/consumer.py b/src/documents/consumer.py
@@ -236,6 +236,10 @@ def _store(self, text, doc, thumbnail, date):
             self.log("debug", "Tagging with {}".format(tag_names))
             document.tags.add(*relevant_tags)
 
+        # Create directory to store document in
+        document.create_source_directory()
+
+        # Safe document and thumbnail
         self._write(document, doc, document.source_path)
         self._write(document, thumbnail, document.thumbnail_path)
 

diff --git a/src/documents/models.py b/src/documents/models.py
@@ -283,9 +283,14 @@ def find_renamed_document(self, subdirectory=""):
         if self.storage_type == self.STORAGE_TYPE_GPG:
             suffix += ".gpg"
 
-        # Go up in the directory hierarchy and try to delete all directories
+        # Start with the (optinally) supplied subdirectory, go up in the
+        # directory hierarchy and try to find the file in question
         root = os.path.normpath(Document.filename_to_path(subdirectory))
 
+        # Check if root really exists and return otherwise
+        if not os.path.isdir(root):
+            return None
+
         for filename in os.listdir(root):
             if filename.endswith(suffix):
                 return os.path.join(subdirectory, filename)

diff --git a/src/documents/parsers.py b/src/documents/parsers.py
@@ -39,14 +39,10 @@ class DocumentParser:
     `paperless_tesseract.parsers` for inspiration.
     """
 
-    SCRATCH = settings.SCRATCH_DIR
-    DATE_ORDER = settings.DATE_ORDER
-    FILENAME_DATE_ORDER = settings.FILENAME_DATE_ORDER
-    OPTIPNG = settings.OPTIPNG_BINARY
-
     def __init__(self, path):
         self.document_path = path
-        self.tempdir = tempfile.mkdtemp(prefix="paperless-", dir=self.SCRATCH)
+        self.tempdir = tempfile.mkdtemp(prefix="paperless-",
+                                        dir=settings.SCRATCH_DIR)
         self.logger = logging.getLogger(__name__)
         self.logging_group = None
 
@@ -60,7 +56,7 @@ def optimise_thumbnail(self, in_path):
 
         out_path = os.path.join(self.tempdir, "optipng.png")
 
-        args = (self.OPTIPNG, "-o5", in_path, "-out", out_path)
+        args = (settings.OPTIPNG_BINARY, "-o5", in_path, "-out", out_path)
         if not subprocess.Popen(args).wait() == 0:
             raise ParseError("Optipng failed at {}".format(args))
 
@@ -101,13 +97,13 @@ def __parser(ds, date_order):
         title = os.path.basename(self.document_path)
 
         # if filename date parsing is enabled, search there first:
-        if self.FILENAME_DATE_ORDER:
+        if settings.FILENAME_DATE_ORDER:
             self.log("info", "Checking document title for date")
             for m in re.finditer(DATE_REGEX, title):
                 date_string = m.group(0)
 
                 try:
-                    date = __parser(date_string, self.FILENAME_DATE_ORDER)
+                    date = __parser(date_string, settings.FILENAME_DATE_ORDER)
                 except (TypeError, ValueError):
                     # Skip all matches that do not parse to a proper date
                     continue
@@ -133,7 +129,7 @@ def __parser(ds, date_order):
             date_string = m.group(0)
 
             try:
-                date = __parser(date_string, self.DATE_ORDER)
+                date = __parser(date_string, settings.DATE_ORDER)
             except (TypeError, ValueError):
                 # Skip all matches that do not parse to a proper date
                 continue

diff --git a/src/documents/tests/samples/letter.pdf b/src/documents/tests/samples/letter.pdf
diff --git a/src/documents/tests/test_consumer.py b/src/documents/tests/test_consumer.py
@@ -1,14 +1,89 @@
 import re
+import os
+import shutil
 
-from django.test import TestCase
-from unittest import mock
+from django.conf import settings
+from django.test import TestCase, override_settings
 from tempfile import TemporaryDirectory
+from unittest import mock
 
 from ..consumer import Consumer
 from ..models import FileInfo, Tag
 
 
 class TestConsumer(TestCase):
+    SAMPLE_FILES = os.path.join(os.path.dirname(__file__), "samples")
+
+    def setUp(self):
+        self.storage = TemporaryDirectory()
+        os.makedirs(os.path.join(self.storage.name, "documents", "originals"),
+                    exist_ok=True)
+        os.makedirs(os.path.join(self.storage.name, "documents", "thumbnails"),
+                    exist_ok=True)
+        self.storage_override = override_settings(MEDIA_ROOT=self.storage.name)
+        self.storage_override.enable()
+
+        self.tmpdir = TemporaryDirectory()
+        self.tmpdir_override = override_settings(
+                CONVERT_TMPDIR=self.tmpdir.name)
+        self.tmpdir_override.enable()
+
+        self.scratchdir = TemporaryDirectory()
+        self.scratchdir_override = override_settings(
+                SCRATCH_DIR=self.scratchdir.name)
+        self.scratchdir_override.enable()
+
+        self.consumptiondir = TemporaryDirectory()
+        self.consumptiondir_override = override_settings(
+                CONSUMPTION_DIR=self.consumptiondir.name)
+        self.consumptiondir_override.enable()
+
+    def tearDown(self):
+        self.storage.cleanup()
+        self.storage_override.disable()
+        self.tmpdir.cleanup()
+        self.tmpdir_override.disable()
+        self.scratchdir.cleanup()
+        self.scratchdir_override.disable()
+        self.consumptiondir.cleanup()
+        self.consumptiondir_override.disable()
+
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/{title}")
+    def test_file_consumption(self):
+        myConsumer = Consumer(consume=settings.CONSUMPTION_DIR,
+                              scratch=settings.SCRATCH_DIR)
+
+        # Put sample document into consumption folder
+        shutil.copyfile(os.path.join(self.SAMPLE_FILES, "letter.pdf"),
+                        os.path.join(settings.CONSUMPTION_DIR, "letter.pdf"))
+
+        myConsumer.consume_new_files()
+
+        # Check if consumed file has been stored correctly
+        self.assertEqual(os.path.isfile(os.path.join(
+            settings.MEDIA_ROOT, "documents", "originals", "none",
+            "letter-0000001.pdf.gpg")), True)
+
+    @override_settings(PAPERLESS_FILENAME_FORMAT="{correspondent}/dummy")
+    def test_duplicate_file_consumption(self):
+        myConsumer = Consumer(consume=settings.CONSUMPTION_DIR,
+                              scratch=settings.SCRATCH_DIR)
+
+        # Put sample document into consumption folder
+        shutil.copyfile(os.path.join(self.SAMPLE_FILES, "letter.pdf"),
+                        os.path.join(settings.CONSUMPTION_DIR, "letter.pdf"))
+        shutil.copyfile(os.path.join(self.SAMPLE_FILES, "letter.pdf"),
+                        os.path.join(settings.CONSUMPTION_DIR, "letter2.pdf"))
+
+        myConsumer.consume_new_files()
+
+        # Check if consumed file has been stored correctly
+        self.assertEqual(os.path.isfile(os.path.join(
+            settings.MEDIA_ROOT, "documents", "originals", "none",
+            "dummy-0000001.pdf.gpg")), True)
+        self.assertEqual(os.path.isfile(os.path.join(
+            settings.MEDIA_ROOT, "documents", "originals", "none",
+            "dummy-0000002.pdf.gpg")), False)
 
     class DummyParser(object):
         pass