Skip to content
This repository has been archived by the owner on Feb 19, 2021. It is now read-only.

Commit

Permalink
fix: allow for caps in file name suffixes #206
Browse files Browse the repository at this point in the history
@schinkelg ran aground of this one and I took the opportunity to add a
test to catch this sort of thing for next time.
  • Loading branch information
danielquinn committed Mar 28, 2017
1 parent 5b88ebf commit fa4924d
Show file tree
Hide file tree
Showing 5 changed files with 102 additions and 3 deletions.
5 changes: 5 additions & 0 deletions docs/changelog.rst
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
Changelog
#########

* 0.4.1
* Fix for `#206`_ wherein the pluggable parser didn't recognise files with
all-caps suffixes like ``.PDF``

* 0.4.0
* Introducing reminders. See `#199`_ for more information, but the short
explanation is that you can now attach simple notes & times to documents
Expand Down Expand Up @@ -211,3 +215,4 @@ Changelog
.. _#179: https://github.com/danielquinn/paperless/pull/179
.. _#199: https://github.com/danielquinn/paperless/issues/199
.. _#200: https://github.com/danielquinn/paperless/issues/200
.. _#206: https://github.com/danielquinn/paperless/issues/206
12 changes: 11 additions & 1 deletion src/documents/consumer.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def consume(self):
parser_class = self._get_parser_class(doc)
if not parser_class:
self.log(
"info", "No parsers could be found for {}".format(doc))
"error", "No parsers could be found for {}".format(doc))
self._ignore.append(doc)
continue

Expand Down Expand Up @@ -160,6 +160,16 @@ def _get_parser_class(self, doc):
if result:
options.append(result)

self.log(
"info",
"Parsers available: {}".format(
", ".join([str(o["parser"].__name__) for o in options])
)
)

if not options:
return None

# Return the parser with the highest weight.
return sorted(
options, key=lambda _: _["weight"], reverse=True)[0]["parser"]
Expand Down
48 changes: 48 additions & 0 deletions src/documents/tests/test_consumer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,56 @@
from django.test import TestCase
from unittest import mock

from ..consumer import Consumer
from ..models import FileInfo


class TestConsumer(TestCase):

class DummyParser(object):
pass

def test__get_parser_class_1_parser(self):
self.assertEqual(
self._get_consumer()._get_parser_class("doc.pdf"),
self.DummyParser
)

@mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send")
def test__get_parser_class_n_parsers(self, m, *args):

class DummyParser1(object):
pass

class DummyParser2(object):
pass

m.return_value = (
(None, lambda _: {"weight": 0, "parser": DummyParser1}),
(None, lambda _: {"weight": 1, "parser": DummyParser2}),
)

self.assertEqual(Consumer()._get_parser_class("doc.pdf"), DummyParser2)

@mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send")
def test__get_parser_class_0_parsers(self, m, *args):
m.return_value = ((None, lambda _: None),)
self.assertIsNone(Consumer()._get_parser_class("doc.pdf"))

@mock.patch("documents.consumer.os.makedirs")
@mock.patch("documents.consumer.os.path.exists", return_value=True)
@mock.patch("documents.consumer.document_consumer_declaration.send")
def _get_consumer(self, m, *args):
m.return_value = (
(None, lambda _: {"weight": 0, "parser": self.DummyParser}),
)
return Consumer()


class TestAttributes(TestCase):

TAGS = ("tag1", "tag2", "tag3")
Expand Down
4 changes: 2 additions & 2 deletions src/paperless_tesseract/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

class ConsumerDeclaration(object):

MATCHING_FILES = re.compile("^.*\.(pdf|jpg|gif|png|tiff|pnm|bmp)$")
MATCHING_FILES = re.compile("^.*\.(pdf|jpg|gif|png|tiff?|pnm|bmp)$")

@classmethod
def handle(cls, sender, **kwargs):
Expand All @@ -14,7 +14,7 @@ def handle(cls, sender, **kwargs):
@classmethod
def test(cls, doc):

if cls.MATCHING_FILES.match(doc):
if cls.MATCHING_FILES.match(doc.lower()):
return {
"parser": RasterisedDocumentParser,
"weight": 0
Expand Down
36 changes: 36 additions & 0 deletions src/paperless_tesseract/tests/test_signals.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
from django.test import TestCase

from ..signals import ConsumerDeclaration


class SignalsTestCase(TestCase):

def test_test_handles_various_file_names_true(self):

prefixes = (
"doc", "My Document", "Μυ Γρεεκ Δοψθμεντ", "Doc -with - tags",
"A document with a . in it", "Doc with -- in it"
)
suffixes = (
"pdf", "jpg", "gif", "png", "tiff", "tif", "pnm", "bmp",
"PDF", "JPG", "GIF", "PNG", "TIFF", "TIF", "PNM", "BMP",
"pDf", "jPg", "gIf", "pNg", "tIff", "tIf", "pNm", "bMp",
)

for prefix in prefixes:
for suffix in suffixes:
name = "{}.{}".format(prefix, suffix)
self.assertTrue(ConsumerDeclaration.test(name))

def test_test_handles_various_file_names_false(self):

prefixes = ("doc",)
suffixes = ("txt", "markdown", "",)

for prefix in prefixes:
for suffix in suffixes:
name = "{}.{}".format(prefix, suffix)
self.assertFalse(ConsumerDeclaration.test(name))

self.assertFalse(ConsumerDeclaration.test(""))
self.assertFalse(ConsumerDeclaration.test("doc"))

0 comments on commit fa4924d

Please sign in to comment.