Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Making FPDF.output() x100 time faster by using a bytearray buffer #164

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
137 changes: 78 additions & 59 deletions fpdf/fpdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,17 +15,18 @@

from __future__ import division, with_statement

from contextlib import contextmanager
from datetime import datetime
from functools import wraps
import math
import errno
import os, sys, zlib, struct, re, tempfile, struct
import errno, logging, math, os, re, struct, sys, tempfile, zlib

from .ttfonts import TTFontFile
from .fonts import fpdf_charwidths
from .php import substr, sprintf, print_r, UTF8ToUTF16BE, UTF8StringToArray
from .py3k import PY3K, pickle, urlopen, BytesIO, Image, basestring, unicode, exception, b, hashpath

LOGGER = logging.getLogger(__name__)

# Global variables
FPDF_VERSION = '1.7.2'
FPDF_FONT_DIR = os.path.join(os.path.dirname(__file__),'font')
Expand Down Expand Up @@ -64,7 +65,7 @@ def __init__(self, orientation = 'P', unit = 'mm', format = 'A4'):
self.offsets = {} # array of object offsets
self.page = 0 # current page number
self.n = 2 # current object number
self.buffer = '' # buffer holding in-memory PDF
self.buffer = bytearray() # buffer holding in-memory PDF
self.pages = {} # array containing pages and metadata
self.state = 0 # current document state
self.fonts = {} # array of used fonts
Expand Down Expand Up @@ -498,7 +499,6 @@ def add_font(self, family, style='', fname='', uni=False):
ttffilename = os.path.join(SYSTEM_TTFONTS, fname)
else:
raise RuntimeError("TTF Font file not found: %s" % fname)
name = ''
if FPDF_CACHE_MODE == 0:
unifilename = os.path.splitext(ttffilename)[0] + '.pkl'
elif FPDF_CACHE_MODE == 2:
Expand All @@ -523,7 +523,7 @@ def add_font(self, family, style='', fname='', uni=False):
'ItalicAngle': int(ttf.italicAngle),
'StemV': int(round(ttf.stemV, 0)),
'MissingWidth': int(round(ttf.defaultWidth, 0)),
}
}
# Generate metrics .pkl file
font_dict = {
'name': re.sub('[ ()]', '', ttf.fullName),
Expand Down Expand Up @@ -678,16 +678,16 @@ def text(self, x, y, txt=''):
"Output a string"
txt = self.normalize_text(txt)
if (self.unifontsubset):
txt2 = self._escape(UTF8ToUTF16BE(txt, False))
txt2 = UTF8ToUTF16BE(self._escape(txt), False)
for uni in UTF8StringToArray(txt):
self.current_font['subset'].append(uni)
else:
txt2 = self._escape(txt)
s=sprintf('BT %.2f %.2f Td (%s) Tj ET',x*self.k,(self.h-y)*self.k, txt2)
s=sprintf(b'BT %.2f %.2f Td (%s) Tj ET',x*self.k,(self.h-y)*self.k, txt2)
if(self.underline and txt!=''):
s+=' '+self._dounderline(x,y,txt)
s+=b' '+self._dounderline(x,y,txt)
if(self.color_flag):
s='q '+self.text_color+' '+s+' Q'
s=b'q '+self.text_color.encode()+b' '+s+b' Q'
self._out(s)

@check_page
Expand Down Expand Up @@ -1115,22 +1115,17 @@ def output(self, name='',dest=''):
dest='I'
else:
dest='F'
if PY3K:
# manage binary data as latin1 until PEP461 or similar is implemented
buffer = self.buffer.encode("latin1")
else:
buffer = self.buffer
if dest in ('I', 'D'):
# Python < 3 writes byte data transparently without "buffer"
stdout = getattr(sys.stdout, 'buffer', sys.stdout)
stdout.write(buffer)
stdout.write(self.buffer)
elif dest=='F':
#Save to local file
with open(name,'wb') as f:
f.write(buffer)
f.write(self.buffer)
elif dest=='S':
#Return as a byte string
return buffer
return self.buffer
else:
self.error('Incorrect output destination: '+dest)

Expand Down Expand Up @@ -1172,8 +1167,8 @@ def _putpages(self):
# Now repeat for no pages in non-subset fonts
for n in range(1,nb + 1):
self.pages[n]["content"] = \
self.pages[n]["content"].replace(self.str_alias_nb_pages,
str(nb))
self.pages[n]["content"].replace(self.str_alias_nb_pages.encode(),
str(nb).encode())
if self.def_orientation == 'P':
dw_pt = self.dw_pt
dh_pt = self.dh_pt
Expand Down Expand Up @@ -1206,6 +1201,7 @@ def _putpages(self):
annots += '/A <</S /URI /URI ' + \
self._textstring(pl[4]) + '>>>>'
else:
assert pl[4] in self.links, f'Page {n} has a link with an invalid index: {pl[4]} (doc #links={len(self.links)})'
l = self.links[pl[4]]
if l[0] in self.orientation_changes:
h = w_pt
Expand All @@ -1222,9 +1218,7 @@ def _putpages(self):
# Page content
content = self.pages[n]["content"]
if self.compress:
# manage binary data as latin1 until PEP461 or similar is implemented
p = content.encode("latin1") if PY3K else content
p = zlib.compress(p)
p = zlib.compress(content)
else:
p = content
self._newobj()
Expand Down Expand Up @@ -1633,15 +1627,18 @@ def _putresourcedict(self):
self._out('>>')

def _putresources(self):
self._putfonts()
self._putimages()
with self._trace_size('resources.fonts'):
self._putfonts()
with self._trace_size('resources.images'):
self._putimages()
#Resource dictionary
self.offsets[2]=len(self.buffer)
self._out('2 0 obj')
self._out('<<')
self._putresourcedict()
self._out('>>')
self._out('endobj')
with self._trace_size('resources.dict'):
self.offsets[2]=len(self.buffer)
self._out('2 0 obj')
self._out('<<')
self._putresourcedict()
self._out('>>')
self._out('endobj')

def _putinfo(self):
self._out('/Producer '+self._textstring('PyFPDF '+FPDF_VERSION+' http://pyfpdf.googlecode.com/'))
Expand Down Expand Up @@ -1684,41 +1681,48 @@ def _puttrailer(self):
self._out('/Info '+str(self.n-1)+' 0 R')

def _enddoc(self):
self._putheader()
self._putpages()
self._putresources()
LOGGER.debug('Final doc sections size summary:')
with self._trace_size('header'):
self._putheader()
with self._trace_size('pages'):
self._putpages()
self._putresources() # trace_size is performed inside
#Info
self._newobj()
self._out('<<')
self._putinfo()
self._out('>>')
self._out('endobj')
with self._trace_size('info'):
self._newobj()
self._out('<<')
self._putinfo()
self._out('>>')
self._out('endobj')
#Catalog
self._newobj()
self._out('<<')
self._putcatalog()
self._out('>>')
self._out('endobj')
with self._trace_size('catalog'):
self._newobj()
self._out('<<')
self._putcatalog()
self._out('>>')
self._out('endobj')
#Cross-ref
o=len(self.buffer)
self._out('xref')
self._out('0 '+(str(self.n+1)))
self._out('0000000000 65535 f ')
for i in range(1,self.n+1):
self._out(sprintf('%010d 00000 n ',self.offsets[i]))
with self._trace_size('xref'):
o=len(self.buffer)
self._out('xref')
self._out('0 '+(str(self.n+1)))
self._out('0000000000 65535 f ')
for i in range(1,self.n+1):
self._out(sprintf('%010d 00000 n ',self.offsets[i]))
#Trailer
self._out('trailer')
self._out('<<')
self._puttrailer()
self._out('>>')
self._out('startxref')
self._out(o)
with self._trace_size('trailer'):
self._out('trailer')
self._out('<<')
self._puttrailer()
self._out('>>')
self._out('startxref')
self._out(o)
self._out('%%EOF')
self.state=3

def _beginpage(self, orientation, format, same):
self.page += 1
self.pages[self.page] = {"content": ""}
self.pages[self.page] = {"content": bytearray()}
self.state = 2
self.x = self.l_margin
self.y = self.t_margin
Expand Down Expand Up @@ -1984,9 +1988,9 @@ def _out(self, s):
elif not isinstance(s, basestring):
s = str(s)
if(self.state == 2):
self.pages[self.page]["content"] += (s + "\n")
self.pages[self.page]["content"] += (s.encode("latin1") + b"\n")
else:
self.buffer += (s + "\n")
self.buffer += (s.encode("latin1") + b"\n")

@check_page
def interleaved2of5(self, txt, x, y, w=1.0, h=10.0):
Expand Down Expand Up @@ -2067,3 +2071,18 @@ def code39(self, txt, x, y, w=1.5, h=5.0):
self.rect(x, y, dim[d], h, 'F')
x += dim[d]
x += dim['n']

@contextmanager
def _trace_size(self, label):
prev_size = len(self.buffer)
yield
LOGGER.debug('- %s.size: %s', label, _sizeof_fmt(len(self.buffer) - prev_size))


def _sizeof_fmt(num, suffix='B'):
# Recipe from: https://stackoverflow.com/a/1094933/636849
for unit in ['','Ki','Mi','Gi','Ti','Pi','Ei','Zi']:
if abs(num) < 1024.0:
return "%3.1f%s%s" % (num, unit, suffix)
num /= 1024.0
return "%.1f%s%s" % (num, 'Yi', suffix)
6 changes: 1 addition & 5 deletions fpdf/php.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,7 @@ def UTF8ToUTF16BE(instr, setbom=True):
outstr += "\xFE\xFF".encode("latin1")
if not isinstance(instr, unicode):
instr = instr.decode('UTF-8')
outstr += instr.encode('UTF-16BE')
# convert bytes back to fake unicode string until PEP461-like is implemented
if PY3K:
outstr = outstr.decode("latin1")
return outstr
return outstr + instr.encode('UTF-16BE')

def UTF8StringToArray(instr):
"Converts UTF-8 strings to codepoints array"
Expand Down