Skip to content

Commit

Permalink
[2.x] utils module test suite (#82)
Browse files Browse the repository at this point in the history
* ADD `test_data_path()` func

* CUT password prompting from `Info.__init__` for encrypted pdfs
 - fix `Info().decrypted()` method

* simplify `Info().encrypted()` method

* add todo

* ADD utils module test suite with test data

* MAKE test_utils_path.py for testing `add_suffix()` function

* MAKE test_utils_read.py for testing `pypdf3_reader()` function

* reformat todo

* reformat todo

* CUT unused test function
  • Loading branch information
sfneal authored Jul 8, 2024
1 parent 0bccbe5 commit 6114847
Show file tree
Hide file tree
Showing 10 changed files with 251 additions and 18 deletions.
28 changes: 12 additions & 16 deletions pdfconduit/utils/info.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,24 +3,14 @@


class Info:
def __init__(self, path, password=None, prompt=True):
self.pdf = self._reader(path, password, prompt)
def __init__(self, path, password=None):
self.pdf = self._reader(path, password)

@staticmethod
def _reader(path, password, prompt):
def _reader(path, password):
"""Read PDF and decrypt if encrypted."""
pdf = PdfFileReader(path) if not isinstance(path, PdfFileReader) else path
# Check that PDF is encrypted
if pdf.isEncrypted:
# Check that password is none
if not password:
pdf.decrypt('')
# Try and decrypt PDF using no password, prompt for password
if pdf.isEncrypted and prompt:
print('No password has been given for encrypted PDF ', path)
password = input('Enter Password: ')
else:
return False
if password:
pdf.decrypt(password)
return pdf

Expand All @@ -32,12 +22,12 @@ def _resolved_objects(pdf, xobject):
@property
def encrypted(self):
"""Check weather a PDF is encrypted"""
return True if self.pdf.isEncrypted else False
return self.pdf.isEncrypted

@property
def decrypted(self):
"""Check weather a PDF is encrypted"""
return True if self.pdf.isDecrypted else False
return not self.encrypted

@property
def pages(self):
Expand All @@ -51,6 +41,7 @@ def metadata(self):

def resources(self):
"""Retrieve contents of each page of PDF"""
# todo: refactor to generator?
return [self.pdf.getPage(i) for i in range(self.pdf.getNumPages())]

@property
Expand All @@ -61,6 +52,8 @@ def security(self):
@property
def dimensions(self):
"""Get width and height of a PDF"""
# todo: add page parameter?
# todo: add height & width methods?
size = self.pdf.getPage(0).mediaBox
return {'w': float(size[2]), 'h': float(size[3])}

Expand All @@ -73,4 +66,7 @@ def size(self):
@property
def rotate(self):
"""Retrieve rotation info."""
# todo: add page param
# todo: refactor to `rotation()`
# todo: add is_rotated
return self._resolved_objects(self.pdf, '/Rotate')
2 changes: 1 addition & 1 deletion pdfconduit/utils/path.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
from pathlib import Path

if 'pathlib' in sys.modules:

def _add_suffix(file_path, suffix, sep, ext):
p = Path(file_path)
_ext = p.suffix if ext is None else str('.' + ext.strip('.'))
Expand All @@ -24,6 +23,7 @@ def add_suffix(file_path, suffix='modified', sep='_', ext=None):


def set_destination(source, suffix, filename=False, ext=None):
# todo: can be removed, not used
"""Create new pdf filename for temp files"""
source_dirname = os.path.dirname(source)

Expand Down
1 change: 1 addition & 0 deletions pdfconduit/utils/receipt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@


class Receipt:
# todo: refactor to write to logs
def __init__(self, use=True, gui=False):
self.dst = None
self.use = use
Expand Down
1 change: 1 addition & 0 deletions pdfconduit/utils/write.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@


def overlay_pdfs(top_pdf, bottom_pdf, destination):
# todo: possibly remove?
"""
Overlay PDF objects to files
:param top_pdf: PDF object to be placed on top
Expand Down
7 changes: 6 additions & 1 deletion tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,9 @@
pdf_path = os.path.join(test_data_dir, pdf_name)
img_path = os.path.join(test_data_dir, img_name)

__all__ = ['pdf_path', 'img_path', 'test_data_dir']

def test_data_path(filename):
return os.path.join(test_data_dir, filename)


__all__ = ['pdf_path', 'img_path', 'test_data_dir', 'test_data_path']
Binary file added tests/data/encrypted.pdf
Binary file not shown.
Binary file added tests/data/rotated.pdf
Binary file not shown.
143 changes: 143 additions & 0 deletions tests/test_utils_info.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
import unittest

from looptools import Timer

from pdfconduit import Info
from tests import *


class TestInfo(unittest.TestCase):
@Timer.decorator
def test_pypdf3_is_encrypted(self):
info = self._get_info('encrypted.pdf')

self.assertTrue(info.encrypted)

@Timer.decorator
def test_pypdf3_is_decrypted(self):
info = self._get_info('article.pdf')

self.assertTrue(info.decrypted)

@Timer.decorator
def test_pypdf3_is_not_encrypted(self):
info = self._get_info('article.pdf')

self.assertFalse(info.encrypted)

@Timer.decorator
def test_pypdf3_is_not_decrypted(self):
info = self._get_info('encrypted.pdf')

self.assertFalse(info.decrypted)

@Timer.decorator
def test_pypdf3_pages(self):
info = self._get_info('article.pdf')

self.assertIsInstance(info.pages, int)
self.assertEqual(info.pages, 1)
self.assertEqual(Info(test_data_path('document.pdf')).pages, 11)

@Timer.decorator
def test_pypdf3_metadata(self):
info = self._get_info('article.pdf')

self.assertIsInstance(info.metadata, dict)
self.assertEqual(info.metadata['/Creator'], 'This PDF is created by PDF4U Pro 2.0')
self.assertEqual(info.metadata['/CreationDate'], 'D:20040120105826',)
self.assertEqual(info.metadata['/Producer'], 'PDF4U Adobe PDF Creator 2.0',)

@Timer.decorator
def test_pypdf3_resources(self):
info = self._get_info('article.pdf')

self.assertEqual(info.pages, len(info.resources()))

resources = info.resources()[0]
self.assertIsInstance(resources, dict)
self.assertEqual(resources['/Type'], '/Page')

mediabox = list(resources['/MediaBox'])
self.assertEqual(mediabox[0], 0)
self.assertEqual(mediabox[1], 0)
self.assertEqual(float(mediabox[2]), 595.276)
self.assertEqual(float(mediabox[3]), 841.89)

@Timer.decorator
def test_pypdf3_security_encrypted_pdf(self):
info = self._get_info('encrypted.pdf')

self.assertIsInstance(info.security, dict)
self.assertEqual(info.security, {})

@Timer.decorator
def test_pypdf3_security_decrypted_pdf(self):
info = self._get_info('encrypted.pdf', 'foo')

self.assertIsInstance(info.security, dict)
self.assertTrue('/V' in info.security)
self.assertTrue('/R' in info.security)
self.assertTrue('/Length' in info.security)
self.assertTrue('/P' in info.security)
self.assertTrue('/Filter' in info.security)
self.assertTrue('/O' in info.security)
self.assertTrue('/U' in info.security)

@Timer.decorator
def test_pypdf3_security_passwordless_pdf(self):
info = self._get_info('article.pdf')

self.assertIsInstance(info.security, dict)
self.assertEqual(info.security, {})

@Timer.decorator
def test_pypdf3_dimensions(self):
info = self._get_info('article.pdf')

self.assertIsInstance(info.dimensions, dict)
self.assertTrue('w' in info.dimensions)
self.assertTrue('h' in info.dimensions)
self.assertIsInstance(info.dimensions['w'], float)
self.assertIsInstance(info.dimensions['h'], float)
self.assertEqual(info.dimensions['w'], 595.276)
self.assertEqual(info.dimensions['h'], 841.89)

@Timer.decorator
def test_pypdf3_size(self):
info = self._get_info('article.pdf')

self.assertIsInstance(info.size, tuple)
self.assertEqual(len(info.size), 2)
self.assertIsInstance(info.size[0], float)
self.assertIsInstance(info.size[1], float)
self.assertEqual(info.size[0], 595.276)
self.assertEqual(info.size[1], 841.89)

@Timer.decorator
def test_pypdf3_size_and_dimensions_are_equal(self):
info = self._get_info('article.pdf')

self.assertEqual(info.size[0], info.dimensions['w'])
self.assertEqual(info.size[1], info.dimensions['h'])

@Timer.decorator
def test_pypdf3_rotate_no_rotation(self):
info = self._get_info('article.pdf')

self.assertEqual(info.rotate, None)

@Timer.decorator
def test_pypdf3_rotate_rotated(self):
info = self._get_info('rotated.pdf')

self.assertIsInstance(info.rotate, int)
self.assertEqual(info.rotate, 90)

@staticmethod
def _get_info(filename, password=None):
return Info(test_data_path(filename), password=password)


if __name__ == '__main__':
unittest.main()
45 changes: 45 additions & 0 deletions tests/test_utils_path.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import os.path
import unittest

from looptools import Timer

from pdfconduit.utils import add_suffix, set_destination
from tests import *


class TestPath(unittest.TestCase):
@Timer.decorator
def test_add_suffix(self):
file_path = test_data_path('article.pdf')
with_suffix = add_suffix(file_path)

self.assertIsInstance(with_suffix, str)
self.assertEqual(with_suffix, os.path.join(os.path.dirname(file_path), 'article_modified.pdf'))

@Timer.decorator
def test_add_suffix_suffix(self):
file_path = test_data_path('article.pdf')
with_suffix = add_suffix(file_path, 'new')

self.assertIsInstance(with_suffix, str)
self.assertEqual(with_suffix, os.path.join(os.path.dirname(file_path), 'article_new.pdf'))

@Timer.decorator
def test_add_suffix_suffix_sep(self):
file_path = test_data_path('article.pdf')
with_suffix = add_suffix(file_path, 'old', '-')

self.assertIsInstance(with_suffix, str)
self.assertEqual(with_suffix, os.path.join(os.path.dirname(file_path), 'article-old.pdf'))

@Timer.decorator
def test_add_suffix_suffix_sep_ext(self):
file_path = test_data_path('article.pdf')
with_suffix = add_suffix(file_path, 'backup', '-', 'zip')

self.assertIsInstance(with_suffix, str)
self.assertEqual(with_suffix, os.path.join(os.path.dirname(file_path), 'article-backup.zip'))


if __name__ == '__main__':
unittest.main()
42 changes: 42 additions & 0 deletions tests/test_utils_read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import unittest

from PyPDF3 import PdfFileReader
from PyPDF3.utils import PdfReadError
from looptools import Timer

from pdfconduit.utils import pypdf3_reader
from tests import *


class TestRead(unittest.TestCase):
@Timer.decorator
def test_pypdf3_reader_can_read_unencrypted(self):
file_path = test_data_path('document.pdf')
reader = pypdf3_reader(file_path)

self.assertIsInstance(reader, PdfFileReader)
self.assertEqual(reader.getNumPages(), 11)

@Timer.decorator
def test_pypdf3_reader_cant_read_encrypted(self):
file_path = test_data_path('encrypted.pdf')
reader = pypdf3_reader(file_path)

with self.assertRaises(PdfReadError) as context:
reader.getNumPages()

self.assertIsInstance(context.exception, PdfReadError)
self.assertEqual('File has not been decrypted', context.exception.__str__())

@Timer.decorator
def test_pypdf3_reader_can_read_encrypted_with_password(self):
file_path = test_data_path('encrypted.pdf')
reader = pypdf3_reader(file_path, 'foo')

self.assertIsInstance(reader, PdfFileReader)
self.assertTrue(reader.isEncrypted)
self.assertEqual(reader.getNumPages(), 11)


if __name__ == '__main__':
unittest.main()

0 comments on commit 6114847

Please sign in to comment.