Skip to content

Commit 22f0277

Browse files
committed
feat: clean and doc
1 parent 90d428d commit 22f0277

File tree

3 files changed

+111
-10
lines changed

3 files changed

+111
-10
lines changed

LICENSE

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2023 Roméo PHILLIPS
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

pyproject.toml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,10 @@ description = "Export and download Notion pages asynchronously."
99
authors = ["Romeo Phillips"]
1010

1111
[tool.poetry.dependencies]
12-
python = "^3.7" # or whichever version you're targeting
13-
certifi = "2023.7.22"
14-
charset-normalizer = "3.2.0"
15-
idna = "3.4"
12+
python = "^3.7"
1613
requests = "2.31.0"
1714
tqdm = "4.66.1"
1815
urllib3 = "2.0.4"
19-
wheel = "0.41.2"
2016

2117

2218
[build]

src/python_notion_exporter/main.py

Lines changed: 89 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,13 +12,13 @@
1212
from tqdm import tqdm
1313

1414

15-
class ExportType():
15+
class ExportType:
1616
MARKDOWN = "markdown"
1717
HTML = "html"
1818
PDF = "pdf"
1919

2020

21-
class ViewExportType():
21+
class ViewExportType:
2222
CURRENT_VIEW = "currentView"
2323
ALL = "all"
2424

@@ -37,6 +37,22 @@ def __init__(
3737
recursive=True,
3838
workers=multiprocessing.cpu_count(),
3939
):
40+
"""
41+
Initializes the NotionExporter class.
42+
43+
Args:
44+
token_v2 (str): The user's Notion V2 token.
45+
file_token (str): The user's file token for Notion.
46+
pages (dict): Dictionary of pages to be exported.
47+
export_directory (str, optional): Directory where exports will be saved. Defaults to the current directory.
48+
flatten_export_file_tree (bool, optional): If True, flattens the export file tree. Defaults to True.
49+
export_type (ExportType, optional): Type of export (e.g., MARKDOWN, HTML, PDF). Defaults to MARKDOWN.
50+
current_view_export_type (ViewExportType, optional): Type of view export (e.g., CURRENT_VIEW, ALL). Defaults to CURRENT_VIEW.
51+
include_files (bool, optional): If True, includes files in the export. Defaults to False.
52+
recursive (bool, optional): If True, exports will be recursive. Defaults to True.
53+
workers (int, optional): Number of worker threads for exporting. Defaults to the number of CPUs available.
54+
"""
55+
4056
self.export_name = f"export-{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}"
4157
self.token_v2 = token_v2
4258
self.file_token = file_token
@@ -59,11 +75,30 @@ def __init__(
5975
os.makedirs(f"{self.export_directory}{self.export_name}", exist_ok=True)
6076

6177
def _to_uuid_format(self, s):
78+
"""
79+
Converts a string to UUID format.
80+
81+
Args:
82+
s (str): The input string.
83+
84+
Returns:
85+
str: The string in UUID format.
86+
"""
6287
if "-" == s[8] and "-" == s[13] and "-" == s[18] and "-" == s[23]:
6388
return s
6489
return f"{s[:8]}-{s[8:12]}-{s[12:16]}-{s[16:20]}-{s[20:]}"
6590

6691
def _get_format_options(self, export_type: ExportType, include_files=False):
92+
"""
93+
Retrieves format options based on the export type and whether to include files.
94+
95+
Args:
96+
export_type (ExportType): Type of export (e.g., MARKDOWN, HTML, PDF).
97+
include_files (bool, optional): If True, includes files in the export. Defaults to False.
98+
99+
Returns:
100+
dict: A dictionary containing format options.
101+
"""
67102
format_options = {}
68103
if export_type == ExportType.PDF:
69104
format_options["pdfFormat"] = "Letter"
@@ -74,6 +109,15 @@ def _get_format_options(self, export_type: ExportType, include_files=False):
74109
return format_options
75110

76111
def _export(self, id):
112+
"""
113+
Initiates the export of a Notion page.
114+
115+
Args:
116+
id (str): The ID of the Notion page.
117+
118+
Returns:
119+
str: The task ID of the initiated export.
120+
"""
77121
url = "https://www.notion.so/api/v3/enqueueTask"
78122
id = self._to_uuid_format(s=id)
79123
export_options = {
@@ -112,6 +156,15 @@ def _export(self, id):
112156
return response["taskId"]
113157

114158
def _get_status(self, task_id):
159+
"""
160+
Fetches the status of an export task.
161+
162+
Args:
163+
task_id (str): The ID of the export task.
164+
165+
Returns:
166+
dict: A dictionary containing details about the task status.
167+
"""
115168
url = "https://www.notion.so/api/v3/getTasks"
116169

117170
payload = json.dumps({"taskIds": [task_id]})
@@ -122,6 +175,12 @@ def _get_status(self, task_id):
122175
return response[0]
123176

124177
def _download(self, url):
178+
"""
179+
Downloads an exported file from a given URL.
180+
181+
Args:
182+
url (str): The URL of the exported file.
183+
"""
125184
response = requests.request("GET", url, headers=self.download_headers)
126185
file_name = url.split("/")[-1][100:]
127186
with open(
@@ -131,6 +190,15 @@ def _download(self, url):
131190
f.write(response.content)
132191

133192
def _process_page(self, page_details):
193+
"""
194+
Processes an individual Notion page for export.
195+
196+
Args:
197+
page_details (tuple): Tuple containing the name and ID of the Notion page.
198+
199+
Returns:
200+
dict: Details about the export status and any errors.
201+
"""
134202
name, id = page_details
135203
task_id = self._export(id)
136204

@@ -155,10 +223,17 @@ def _process_page(self, page_details):
155223
}
156224

157225
def _wait_for_export_completion(self, task_id):
158-
"""Helper method to wait until the export is complete or failed."""
226+
"""
227+
Waits until a given export task completes or fails.
228+
229+
Args:
230+
task_id (str): The ID of the export task.
231+
232+
Returns:
233+
tuple: A tuple containing the status, state, error, and number of pages exported.
234+
"""
159235
while True:
160236
status = self._get_status(task_id)
161-
# print(status)
162237

163238
if not status:
164239
time.sleep(1)
@@ -175,6 +250,9 @@ def _wait_for_export_completion(self, task_id):
175250
time.sleep(1)
176251

177252
def _unpack(self):
253+
"""
254+
Unpacks and saves exported content from zip archives.
255+
"""
178256
directory_path = f"{self.export_directory}{self.export_name}"
179257
for file in os.listdir(directory_path):
180258
if file.endswith(".zip"):
@@ -183,11 +261,17 @@ def _unpack(self):
183261
os.remove(full_file_path)
184262

185263
def process(self):
264+
"""
265+
Processes and exports all provided Notion pages.
266+
"""
186267
logging.info(f"Exporting {len(self.pages)} pages...")
187268

188269
with ThreadPoolExecutor(max_workers=self.workers) as executor:
189270
with tqdm(total=len(self.pages), dynamic_ncols=True) as pbar:
190-
futures = {executor.submit(self._process_page, item): item for item in self.pages.items()}
271+
futures = {
272+
executor.submit(self._process_page, item): item
273+
for item in self.pages.items()
274+
}
191275
for future in concurrent.futures.as_completed(futures):
192276
result = future.result()
193277
if result["state"] == "failure":

0 commit comments

Comments
 (0)