12
12
from tqdm import tqdm
13
13
14
14
15
- class ExportType () :
15
+ class ExportType :
16
16
MARKDOWN = "markdown"
17
17
HTML = "html"
18
18
PDF = "pdf"
19
19
20
20
21
- class ViewExportType () :
21
+ class ViewExportType :
22
22
CURRENT_VIEW = "currentView"
23
23
ALL = "all"
24
24
@@ -37,6 +37,22 @@ def __init__(
37
37
recursive = True ,
38
38
workers = multiprocessing .cpu_count (),
39
39
):
40
+ """
41
+ Initializes the NotionExporter class.
42
+
43
+ Args:
44
+ token_v2 (str): The user's Notion V2 token.
45
+ file_token (str): The user's file token for Notion.
46
+ pages (dict): Dictionary of pages to be exported.
47
+ export_directory (str, optional): Directory where exports will be saved. Defaults to the current directory.
48
+ flatten_export_file_tree (bool, optional): If True, flattens the export file tree. Defaults to True.
49
+ export_type (ExportType, optional): Type of export (e.g., MARKDOWN, HTML, PDF). Defaults to MARKDOWN.
50
+ current_view_export_type (ViewExportType, optional): Type of view export (e.g., CURRENT_VIEW, ALL). Defaults to CURRENT_VIEW.
51
+ include_files (bool, optional): If True, includes files in the export. Defaults to False.
52
+ recursive (bool, optional): If True, exports will be recursive. Defaults to True.
53
+ workers (int, optional): Number of worker threads for exporting. Defaults to the number of CPUs available.
54
+ """
55
+
40
56
self .export_name = f"export-{ datetime .now ().strftime ('%Y-%m-%d-%H-%M-%S' )} "
41
57
self .token_v2 = token_v2
42
58
self .file_token = file_token
@@ -59,11 +75,30 @@ def __init__(
59
75
os .makedirs (f"{ self .export_directory } { self .export_name } " , exist_ok = True )
60
76
61
77
def _to_uuid_format (self , s ):
78
+ """
79
+ Converts a string to UUID format.
80
+
81
+ Args:
82
+ s (str): The input string.
83
+
84
+ Returns:
85
+ str: The string in UUID format.
86
+ """
62
87
if "-" == s [8 ] and "-" == s [13 ] and "-" == s [18 ] and "-" == s [23 ]:
63
88
return s
64
89
return f"{ s [:8 ]} -{ s [8 :12 ]} -{ s [12 :16 ]} -{ s [16 :20 ]} -{ s [20 :]} "
65
90
66
91
def _get_format_options (self , export_type : ExportType , include_files = False ):
92
+ """
93
+ Retrieves format options based on the export type and whether to include files.
94
+
95
+ Args:
96
+ export_type (ExportType): Type of export (e.g., MARKDOWN, HTML, PDF).
97
+ include_files (bool, optional): If True, includes files in the export. Defaults to False.
98
+
99
+ Returns:
100
+ dict: A dictionary containing format options.
101
+ """
67
102
format_options = {}
68
103
if export_type == ExportType .PDF :
69
104
format_options ["pdfFormat" ] = "Letter"
@@ -74,6 +109,15 @@ def _get_format_options(self, export_type: ExportType, include_files=False):
74
109
return format_options
75
110
76
111
def _export (self , id ):
112
+ """
113
+ Initiates the export of a Notion page.
114
+
115
+ Args:
116
+ id (str): The ID of the Notion page.
117
+
118
+ Returns:
119
+ str: The task ID of the initiated export.
120
+ """
77
121
url = "https://www.notion.so/api/v3/enqueueTask"
78
122
id = self ._to_uuid_format (s = id )
79
123
export_options = {
@@ -112,6 +156,15 @@ def _export(self, id):
112
156
return response ["taskId" ]
113
157
114
158
def _get_status (self , task_id ):
159
+ """
160
+ Fetches the status of an export task.
161
+
162
+ Args:
163
+ task_id (str): The ID of the export task.
164
+
165
+ Returns:
166
+ dict: A dictionary containing details about the task status.
167
+ """
115
168
url = "https://www.notion.so/api/v3/getTasks"
116
169
117
170
payload = json .dumps ({"taskIds" : [task_id ]})
@@ -122,6 +175,12 @@ def _get_status(self, task_id):
122
175
return response [0 ]
123
176
124
177
def _download (self , url ):
178
+ """
179
+ Downloads an exported file from a given URL.
180
+
181
+ Args:
182
+ url (str): The URL of the exported file.
183
+ """
125
184
response = requests .request ("GET" , url , headers = self .download_headers )
126
185
file_name = url .split ("/" )[- 1 ][100 :]
127
186
with open (
@@ -131,6 +190,15 @@ def _download(self, url):
131
190
f .write (response .content )
132
191
133
192
def _process_page (self , page_details ):
193
+ """
194
+ Processes an individual Notion page for export.
195
+
196
+ Args:
197
+ page_details (tuple): Tuple containing the name and ID of the Notion page.
198
+
199
+ Returns:
200
+ dict: Details about the export status and any errors.
201
+ """
134
202
name , id = page_details
135
203
task_id = self ._export (id )
136
204
@@ -155,10 +223,17 @@ def _process_page(self, page_details):
155
223
}
156
224
157
225
def _wait_for_export_completion (self , task_id ):
158
- """Helper method to wait until the export is complete or failed."""
226
+ """
227
+ Waits until a given export task completes or fails.
228
+
229
+ Args:
230
+ task_id (str): The ID of the export task.
231
+
232
+ Returns:
233
+ tuple: A tuple containing the status, state, error, and number of pages exported.
234
+ """
159
235
while True :
160
236
status = self ._get_status (task_id )
161
- # print(status)
162
237
163
238
if not status :
164
239
time .sleep (1 )
@@ -175,6 +250,9 @@ def _wait_for_export_completion(self, task_id):
175
250
time .sleep (1 )
176
251
177
252
def _unpack (self ):
253
+ """
254
+ Unpacks and saves exported content from zip archives.
255
+ """
178
256
directory_path = f"{ self .export_directory } { self .export_name } "
179
257
for file in os .listdir (directory_path ):
180
258
if file .endswith (".zip" ):
@@ -183,11 +261,17 @@ def _unpack(self):
183
261
os .remove (full_file_path )
184
262
185
263
def process (self ):
264
+ """
265
+ Processes and exports all provided Notion pages.
266
+ """
186
267
logging .info (f"Exporting { len (self .pages )} pages..." )
187
268
188
269
with ThreadPoolExecutor (max_workers = self .workers ) as executor :
189
270
with tqdm (total = len (self .pages ), dynamic_ncols = True ) as pbar :
190
- futures = {executor .submit (self ._process_page , item ): item for item in self .pages .items ()}
271
+ futures = {
272
+ executor .submit (self ._process_page , item ): item
273
+ for item in self .pages .items ()
274
+ }
191
275
for future in concurrent .futures .as_completed (futures ):
192
276
result = future .result ()
193
277
if result ["state" ] == "failure" :
0 commit comments