1
- from typing import Any , Dict , List , cast , Union
2
- from typing_extensions import NotRequired , TypedDict , Optional
1
+ from typing import Any , Dict , List , Union , Optional , cast , Literal
2
+ from typing_extensions import NotRequired , TypedDict
3
+
3
4
from .request import Request , RequestConfig
4
5
from .async_request import AsyncRequest , AsyncRequestConfig
5
6
from ._config import ClientConfig
14
15
from .helpers import build_path
15
16
16
17
18
+ #
19
+ # DNS
20
+ #
17
21
class DNSParams (TypedDict ):
18
22
domain : str
19
23
type : NotRequired [str ]
@@ -34,6 +38,9 @@ class DNSResponse(TypedDict):
34
38
authority : List
35
39
36
40
41
+ #
42
+ # HTML to Any
43
+ #
37
44
class HTMLToAnyParams (TypedDict ):
38
45
html : str
39
46
url : str
@@ -58,56 +65,117 @@ class HTMLToAnyResponse(TypedDict):
58
65
html : str
59
66
60
67
68
+ #
69
+ # BYO Proxy
70
+ #
71
+ class CookieParameter (TypedDict ):
72
+ name : str
73
+ value : str
74
+ url : NotRequired [str ]
75
+ domain : NotRequired [str ]
76
+ path : NotRequired [str ]
77
+ secure : NotRequired [bool ]
78
+ httpOnly : NotRequired [bool ]
79
+ sameSite : NotRequired [Literal ["Strict" , "Lax" , "None" ]]
80
+ expires : NotRequired [bool ]
81
+ priority : NotRequired [str ]
82
+ sameParty : NotRequired [bool ]
83
+
84
+
85
+ class GotoOptions (TypedDict ):
86
+ timeout : int
87
+ wait_until : str
88
+
89
+
90
+ class WaitFor (TypedDict ):
91
+ mode : str
92
+ value : Union [str , int ]
93
+
94
+
95
+ class AdvanceConfig (TypedDict ):
96
+ console : bool
97
+ network : bool
98
+ cookies : bool
99
+
100
+
61
101
class BYOProxyAuth (TypedDict ):
62
102
username : str
63
103
password : str
64
104
65
105
66
106
class BYOProxy (TypedDict ):
67
107
server : str
68
- auth : BYOProxyAuth
108
+ auth : NotRequired [BYOProxyAuth ]
109
+
69
110
70
111
class BaseAIScrapeParams (TypedDict ):
71
112
url : str
72
- advance_config : NotRequired [object ]
113
+ root_element_selectors : NotRequired [str ]
114
+ page_position : NotRequired [int ]
115
+ http_headers : NotRequired [Dict [str , Any ]]
116
+ reject_request_pattern : NotRequired [List [str ]]
117
+ goto_options : NotRequired [GotoOptions ]
118
+ wait_for : NotRequired [WaitFor ]
119
+ advance_config : NotRequired [AdvanceConfig ]
73
120
size_preset : NotRequired [str ]
74
121
is_mobile : NotRequired [bool ]
75
122
scale : NotRequired [int ]
76
123
width : NotRequired [int ]
77
124
height : NotRequired [int ]
125
+ cookies : NotRequired [List [CookieParameter ]]
78
126
force_rotate_proxy : NotRequired [bool ]
79
- reject_request_pattern : NotRequired [List [str ]]
80
- http_headers : NotRequired [object ]
81
- goto_options : NotRequired [object ]
82
- wait_for : NotRequired [object ]
83
- cookies : NotRequired [object ]
127
+ byo_proxy : NotRequired [BYOProxy ]
128
+
129
+
130
+ class AIScrapeParamsWithSelector (BaseAIScrapeParams ):
131
+ selectors : List [str ]
132
+ element_prompts : NotRequired [List [str ]]
133
+
84
134
85
135
class AIScrapeParamsWithPrompts (BaseAIScrapeParams ):
86
- selector : Optional [List [str ]]
136
+ selectors : NotRequired [List [str ]]
87
137
element_prompts : List [str ]
88
138
89
- class AIScrapeParamsWithSelector (BaseAIScrapeParams ):
90
- selector : List [str ]
91
- element_prompts : Optional [List [str ]]
92
139
93
140
AIScrapeParams = Union [AIScrapeParamsWithSelector , AIScrapeParamsWithPrompts ]
94
141
95
- class LinkData (TypedDict ):
96
- type : str # "a" or "img"
97
- href : Optional [str ]
142
+
143
+ class Attribute (TypedDict ):
144
+ name : str
145
+ value : str
146
+
147
+
148
+ class Result (TypedDict ):
149
+ html : str
150
+ text : str
151
+ attributes : List [Attribute ]
152
+
153
+
154
+ class DataItem (TypedDict ):
155
+ key : str
156
+ selectors : str
157
+ results : List [Result ]
158
+
159
+
160
+ class Link (TypedDict ):
161
+ href : str
98
162
text : Optional [str ]
163
+ type : Literal ["a" , "img" ]
99
164
100
165
101
166
class AIScrapeResponse (TypedDict ):
102
167
success : bool
103
- data : List [Dict [str , Any ]]
104
- selectors : List [str ]
105
- context : Dict [str , List [str ]]
106
- link : List [LinkData ]
168
+ data : List [DataItem ]
107
169
page_position : int
108
170
page_position_length : int
171
+ context : Dict [str , List [str ]]
172
+ selectors : Dict [str , List [str ]]
173
+ link : List [Link ]
109
174
110
175
176
+ #
177
+ # Web Client
178
+ #
111
179
class Web (ClientConfig ):
112
180
113
181
config : RequestConfig
@@ -130,7 +198,7 @@ def ai_scrape(self, params: AIScrapeParams) -> AIScrapeResponse:
130
198
resp = Request (
131
199
config = self .config ,
132
200
path = path ,
133
- params = cast (AIScrapeParams , params ),
201
+ params = cast (Dict [ Any , Any ] , params ),
134
202
verb = "post" ,
135
203
).perform_with_content ()
136
204
return resp
@@ -177,6 +245,9 @@ def search_suggestions(
177
245
return s .suggestions (params )
178
246
179
247
248
+ #
249
+ # Async Web Client
250
+ #
180
251
class AsyncWeb (ClientConfig ):
181
252
182
253
config : AsyncRequestConfig
@@ -204,7 +275,6 @@ async def ai_scrape(self, params: AIScrapeParams) -> AIScrapeResponse:
204
275
).perform_with_content ()
205
276
return resp
206
277
207
-
208
278
async def html_to_any (self , params : HTMLToAnyParams ) -> Any :
209
279
path = "/web/html_to_any"
210
280
resp = await AsyncRequest (
0 commit comments