Skip to content

Support newer version of PyPDF2 -- also format with black #8

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 38 additions & 31 deletions inject_javascript/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
from tempfile import mkstemp

## From third-party libraries
from PyPDF2 import PdfFileWriter
from PyPDF2 import PdfFileReader
from PyPDF2 import PdfWriter
from PyPDF2 import PdfReader

## From internal libraries
from inject_javascript.lib import notice, error
Expand All @@ -39,96 +39,103 @@
class Inject_JavaScript(object):
"""Injects some JavaScript into the PDF."""

def __init__(self, clobber = False, escape = False, verbose = 0):
def __init__(self, clobber=False, escape=False, verbose=0):
self.clobber = clobber
self.escape = escape
self.verbose = verbose

def return_pdf_data(self, pdf_path = None):
def return_pdf_data(self, pdf_path=None):
if pdf_path is None:
error('no "pdf_path" defined')

## Setup output data object and load source PDF data
writable_pdf_data = PdfFileWriter()
with open(pdf_path, 'rb') as fb:
source_pdf_data = PdfFileReader(fb)
writable_pdf_data = PdfWriter()
with open(pdf_path, "rb") as fb:
source_pdf_data = PdfReader(fb)
## Load source PDF into output object
for i in range(source_pdf_data.getNumPages()):
writable_pdf_data.addPage(source_pdf_data.getPage(i))
for i in range(len(source_pdf_data.pages)):
writable_pdf_data.add_page(source_pdf_data.pages[i])

if self.verbose and self.verbose > 0:
notice('finished loading data into output object from: {0}'.format(pdf_path))
notice(
"finished loading data into output object from: {0}".format(pdf_path)
)

## Return output object to calling process
return writable_pdf_data

def return_js_data(self, js_path = None):
def return_js_data(self, js_path=None):
if js_path is None:
error('no "js_path" defined')

# Loads JavaScript file into memory
with open(js_path, 'r') as fb:
with open(js_path, "r") as fb:
js_data = fb.read()

# Add more replace lines if neaded, so far new lines have been the difficult characters
if self.escape is True:
js_data = js_data.replace('\\', '\\\\')
message = 'escaped special characters prior to returning JavaScript data from: '
js_data = js_data.replace("\\", "\\\\")
message = (
"escaped special characters prior to returning JavaScript data from: "
)
else:
message = 'finished loading JavaScript data from: '
message = "finished loading JavaScript data from: "

if self.verbose and self.verbose > 0:
notice(message + js_path)

return js_data

def save_combined_data(self, pdf_data = None, js_data = None, pdf_path = None, save_path = None):
def save_combined_data(
self, pdf_data=None, js_data=None, pdf_path=None, save_path=None
):
## See: https://security.openstack.org/guidelines/dg_using-temporary-files-securely.html
## for where the following is inspired from
fd, tmp_path = mkstemp()
try:
## Write to a tempfile, note 'b' is there to avoid warnings during write process
with os_fdopen(fd, 'wb') as tmp:
pdf_data.addJS(js_data)
with os_fdopen(fd, "wb") as tmp:
pdf_data.add_js(js_data)
pdf_data.write(tmp)

## Figure out where to save the temp file for the calling process
if self.clobber is True:
save_path = pdf_path
copyfile(tmp_path, save_path)
message = 'overwrote exsisting: '
message = "overwrote exsisting: "
else:
if save_path is None:
save_path = path_join(dirname(tmp_path), basename(pdf_path))
copyfile(tmp_path, save_path)
message = 'enhanced file path: '
message = "enhanced file path: "

finally:
## Clean up and return saved file path or throw error messages about failures
if path_exists(tmp_path) is True:
os_remove(tmp_path)
else:
error('Unable to remove temp path: {0}'.format(tmp_path))
error("Unable to remove temp path: {0}".format(tmp_path))

if path_exists(save_path) is True:
if self.verbose and self.verbose > 0:
notice(message + save_path)
return save_path
else:
error('Unable to make save path: {0}'.format(save_path))
error("Unable to make save path: {0}".format(save_path))

def inject_pdf_with_javascript(self, pdf_path = None, js_path = None, save_path = None):
pdf_data = self.return_pdf_data(pdf_path = pdf_path)
def inject_pdf_with_javascript(self, pdf_path=None, js_path=None, save_path=None):
pdf_data = self.return_pdf_data(pdf_path=pdf_path)

js_data = self.return_js_data(js_path = js_path)
js_data = self.return_js_data(js_path=js_path)

resulting_path = self.save_combined_data(pdf_data = pdf_data,
js_data = js_data,
pdf_path = pdf_path,
save_path = save_path)
resulting_path = self.save_combined_data(
pdf_data=pdf_data, js_data=js_data, pdf_path=pdf_path, save_path=save_path
)

return resulting_path


if __name__ == '__main__':
raise Exception('Try importing `Inject_JavaScript` class from a Python script or shell instead')
if __name__ == "__main__":
raise Exception(
"Try importing `Inject_JavaScript` class from a Python script or shell instead"
)