-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
6865523
commit 9a6e33b
Showing
14 changed files
with
312 additions
and
136 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,15 @@ | ||
from depdf.api import convert_pdf_to_html, convert_pdf_to_html_by_page | ||
from depdf.api import * | ||
from depdf.config import Config | ||
from depdf.pdf import DePDF | ||
from depdf.page import DePage | ||
from depdf.version import __version__ | ||
|
||
__all__ = [ | ||
'convert_pdf_to_html', | ||
'convert_pdf_to_html_by_page', | ||
'Config', | ||
'DePDF', | ||
'DePage', | ||
'convert_pdf_to_html', | ||
'convert_page_to_html', | ||
'extract_page_tables', | ||
'extract_page_paragraphs', | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,31 +1,36 @@ | ||
from depdf.base import Base, Box | ||
from depdf.base import Box, InnerWrapper | ||
from depdf.config import check_config | ||
from depdf.log import logger_init | ||
from depdf.utils import calc_bbox, construct_style | ||
|
||
log = logger_init(__name__) | ||
|
||
|
||
class Paragraph(Base, Box): | ||
class Paragraph(InnerWrapper, Box): | ||
object_type = 'paragraph' | ||
|
||
@check_config | ||
def __init__(self, bbox=None, text='', pid=1, para_idx=1, config=None, inner_object=None): | ||
self.bbox = bbox | ||
def __init__(self, bbox=None, text='', pid=1, para_idx=1, config=None, inner_objects=None, style=None): | ||
para_id = 'page-{pid}-paragraph-{para_id}'.format(pid=pid, para_id=para_idx) | ||
para_class = '{para_class} page-{pid}'.format(para_class=getattr(config, 'paragraph_class'), pid=pid) | ||
html = '<p id="{para_id}" class="{para_class}">'.format( | ||
para_id=para_id, para_class=para_class | ||
style = construct_style(style=style) | ||
html = '<p id="{para_id}" class="{para_class}"{style}>'.format( | ||
para_id=para_id, para_class=para_class, style=style | ||
) | ||
self.pid = pid | ||
self.para_id = para_idx | ||
self.bbox = bbox | ||
if text: | ||
self.text = text | ||
html += str(text) | ||
else: | ||
self._inner_object = [inner_object] | ||
for obj in inner_object: | ||
self.html += getattr(obj, 'html', '') | ||
if bbox is None: | ||
self.bbox = calc_bbox(inner_objects) | ||
self._inner_objects = inner_objects | ||
for obj in inner_objects: | ||
html += getattr(obj, 'html', '') | ||
html += '</p>' | ||
self.html = html | ||
|
||
@property | ||
def inner_object(self): | ||
return [obj.to_dict if hasattr(obj, 'to_dict') else obj for obj in self._inner_object] | ||
def __repr__(self): | ||
return '<depdf.Paragraph: ({}, {})>'.format(self.pid, self.para_id) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,10 @@ | ||
from depdf.base import Base | ||
from depdf.base import Base, Box | ||
|
||
|
||
class Text(Base): | ||
class Text(Base, Box): | ||
object_type = 'text' | ||
|
||
def __init__(self, text): | ||
def __init__(self, bbox='', text=''): | ||
self.bbox = bbox | ||
self.text = text | ||
self.html = text |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.