Skip to content

Commit 38d1c24

Browse files
Merge pull request #49 from JigsawStack/feat/obj_detection
added object detection
2 parents d362b9c + 1fc1a4f commit 38d1c24

File tree

10 files changed

+186
-75
lines changed

10 files changed

+186
-75
lines changed

jigsawstack/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ def __init__(
119119
).image_generation
120120

121121

122+
122123
class AsyncJigsawStack:
123124
validate: AsyncValidate
124125
web: AsyncWeb
@@ -229,5 +230,6 @@ def __init__(
229230
).image_generation
230231

231232

233+
232234
# Create a global instance of the Web class
233235
__all__ = ["JigsawStack", "Search", "JigsawStackError", "AsyncJigsawStack"]

jigsawstack/_client.py

Lines changed: 0 additions & 62 deletions
This file was deleted.

jigsawstack/audio.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from .async_request import AsyncRequest, AsyncRequestConfig
55
from ._config import ClientConfig
66
from typing import Any, Dict, List, cast
7-
from typing_extensions import NotRequired, TypedDict
7+
from typing_extensions import NotRequired, TypedDict, Literal
88
from .custom_typing import SupportedAccents
99
from .helpers import build_path
1010

@@ -14,6 +14,7 @@ class TextToSpeechParams(TypedDict):
1414
accent: NotRequired[SupportedAccents]
1515
speaker_clone_url: NotRequired[str]
1616
speaker_clone_file_store_key: NotRequired[str]
17+
return_type: NotRequired[Literal["url", "binary", "base64"]]
1718

1819

1920
class TTSCloneParams(TypedDict):

jigsawstack/image_generation.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,8 @@ class ImageGenerationParams(TypedDict):
5353
File store key to use as image input.
5454
"""
5555

56+
return_type: NotRequired[Literal["url", "binary", "base64"]]
57+
5658
class ImageGenerationResponse(TypedDict):
5759
success: bool
5860
"""

jigsawstack/search.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,7 @@ class SearchParams(TypedDict):
109109
Two-letter country code to localize search results (e.g. 'US', 'GB')
110110
"""
111111

112-
auto_scrape: bool
112+
auto_scrape: NotRequired[bool]
113113
"""
114114
Whether to automatically scrape content from search result URLs
115115
"""

jigsawstack/translate.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
from typing import Any, Dict, List, Union, cast, overload
2-
from typing_extensions import NotRequired, TypedDict
2+
from typing_extensions import NotRequired, TypedDict, Literal
33
from .request import Request, RequestConfig
44
from .async_request import AsyncRequest
55
from typing import List, Union
@@ -20,6 +20,8 @@ class TranslateImageParams(TypedDict):
2020
The file store key of the image to translate.
2121
"""
2222

23+
return_type: NotRequired[Literal["url", "binary", "base64"]]
24+
2325
class TranslateParams(TypedDict):
2426
target_language: str
2527
"""

jigsawstack/vision.py

Lines changed: 119 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,128 @@
11
from typing import Any, Dict, List, Union, cast, Optional
2-
from typing_extensions import NotRequired, TypedDict
2+
from typing_extensions import NotRequired, TypedDict, Literal
33
from typing import Any, Dict, List, cast
4-
from typing_extensions import NotRequired, TypedDict
4+
from typing_extensions import NotRequired, TypedDict, Literal
55
from .request import Request, RequestConfig
66
from .async_request import AsyncRequest, AsyncRequestConfig
77
from ._config import ClientConfig
88

99

10-
class OCRParams(TypedDict):
10+
class Point(TypedDict):
11+
x: int
12+
"""
13+
X coordinate of the point
14+
"""
15+
16+
y: int
17+
"""
18+
Y coordinate of the point
19+
"""
20+
21+
22+
class BoundingBox(TypedDict):
23+
top_left: Point
24+
"""
25+
Top-left corner of the bounding box
26+
"""
27+
28+
top_right: Point
29+
"""
30+
Top-right corner of the bounding box
31+
"""
32+
33+
bottom_left: Point
34+
"""
35+
Bottom-left corner of the bounding box
36+
"""
37+
38+
bottom_right: Point
39+
"""
40+
Bottom-right corner of the bounding box
41+
"""
42+
43+
width: int
44+
"""
45+
Width of the bounding box
46+
"""
47+
48+
height: int
49+
"""
50+
Height of the bounding box
51+
"""
52+
53+
54+
class GuiElement(TypedDict):
55+
bounds: BoundingBox
56+
"""
57+
Bounding box coordinates of the GUI element
58+
"""
59+
60+
content: Union[str, None]
61+
"""
62+
Content of the GUI element, can be null if no object detected
63+
"""
64+
65+
66+
class DetectedObject(TypedDict):
67+
bounds: BoundingBox
68+
"""
69+
Bounding box coordinates of the detected object
70+
"""
71+
72+
mask: NotRequired[str]
73+
"""
74+
URL or base64 string depending on return_type - only present for some objects
75+
"""
76+
77+
78+
79+
class ObjectDetectionParams(TypedDict):
1180
url: NotRequired[str]
81+
"""
82+
URL of the image to process
83+
"""
84+
1285
file_store_key: NotRequired[str]
86+
"""
87+
File store key of the image to process
88+
"""
89+
90+
prompts: NotRequired[List[str]]
91+
"""
92+
List of prompts for object detection
93+
"""
94+
95+
features: NotRequired[List[Literal["object_detection", "gui"]]]
96+
"""
97+
List of features to enable: object_detection, gui
98+
"""
99+
100+
annotated_image: NotRequired[bool]
101+
"""
102+
Whether to return an annotated image
103+
"""
104+
105+
return_type: NotRequired[Literal["url", "base64"]]
106+
"""
107+
Format for returned images: url or base64
108+
"""
109+
110+
111+
class ObjectDetectionResponse(TypedDict):
112+
annotated_image: NotRequired[str]
113+
"""
114+
URL or base64 string of annotated image (included only if annotated_image=true and objects/gui_elements exist)
115+
"""
116+
117+
gui_elements: NotRequired[List[GuiElement]]
118+
"""
119+
List of detected GUI elements (included only if features includes "gui")
120+
"""
121+
122+
objects: NotRequired[List[DetectedObject]]
123+
"""
124+
List of detected objects (included only if features includes "object_detection")
125+
"""
13126

14127

15128
class VOCRParams(TypedDict):
@@ -60,7 +173,7 @@ def vocr(self, params: VOCRParams) -> OCRResponse:
60173
).perform_with_content()
61174
return resp
62175

63-
def object_detection(self, params: OCRParams) -> OCRResponse:
176+
def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse:
64177
path = "/ai/object_detection"
65178
resp = Request(
66179
config=self.config,
@@ -97,9 +210,9 @@ async def vocr(self, params: VOCRParams) -> OCRResponse:
97210
).perform_with_content()
98211
return resp
99212

100-
async def object_detection(self, params: OCRParams) -> OCRResponse:
213+
async def object_detection(self, params: ObjectDetectionParams) -> ObjectDetectionResponse:
101214
path = "/ai/object_detection"
102-
resp = AsyncRequest(
215+
resp = await AsyncRequest(
103216
config=self.config,
104217
path=path,
105218
params=cast(Dict[Any, Any], params),

jigsawstack/web.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,9 +42,9 @@ class DNSResponse(TypedDict):
4242
# HTML to Any
4343
#
4444
class HTMLToAnyParams(TypedDict):
45-
html: str
46-
url: str
47-
goto_options: NotRequired[object]
45+
html: NotRequired[str]
46+
url: NotRequired[str]
47+
goto_options: NotRequired[Dict[str, Union[int, str]]]
4848
scale: NotRequired[int]
4949
full_page: NotRequired[bool]
5050
omit_background: NotRequired[bool]
@@ -59,6 +59,7 @@ class HTMLToAnyParams(TypedDict):
5959
is_mobile: NotRequired[bool]
6060
dark_mode: NotRequired[bool]
6161
use_graphic_renderer: NotRequired[bool]
62+
return_type: NotRequired[Literal["url", "binary", "base64"]]
6263

6364

6465
class HTMLToAnyResponse(TypedDict):

tests/test_object_detection.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from unittest.mock import MagicMock
2+
import unittest
3+
from jigsawstack.exceptions import JigsawStackError
4+
import jigsawstack
5+
import pytest
6+
import asyncio
7+
import logging
8+
9+
logging.basicConfig(level=logging.INFO)
10+
logger = logging.getLogger(__name__)
11+
12+
jigsaw = jigsawstack.JigsawStack()
13+
async_jigsaw = jigsawstack.AsyncJigsawStack()
14+
15+
16+
def test_object_detection_response():
17+
try:
18+
result = jigsaw.vision.object_detection({"url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg"})
19+
print(result)
20+
assert result["success"] == True
21+
except JigsawStackError as e:
22+
pytest.fail(f"Unexpected JigsawStackError: {e}")
23+
24+
25+
def test_object_detection_response_async():
26+
async def _test():
27+
client = jigsawstack.AsyncJigsawStack()
28+
try:
29+
result = await client.vision.object_detection({"url": "https://rogilvkqloanxtvjfrkm.supabase.co/storage/v1/object/public/demo/Collabo%201080x842.jpg"})
30+
print(result)
31+
assert result["success"] == True
32+
except JigsawStackError as e:
33+
pytest.fail(f"Unexpected JigsawStackError: {e}")
34+
35+
asyncio.run(_test())
36+

tests/test_search.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,22 @@
1414

1515

1616
def test_search_suggestion_response():
17+
try:
18+
result = jigsaw.web.search({"query": "Where is San Francisco"})
19+
assert result["success"] == True
20+
except JigsawStackError as e:
21+
pytest.fail(f"Unexpected JigsawStackError: {e}")
22+
23+
24+
def test_ai_search_response():
25+
try:
26+
result = jigsaw.web.search({"query": "Where is San Francisco"})
27+
assert result["success"] == True
28+
except JigsawStackError as e:
29+
pytest.fail(f"Unexpected JigsawStackError: {e}")
30+
31+
32+
def test_search_suggestion_response_async():
1733
async def _test():
1834
client = jigsawstack.AsyncJigsawStack()
1935
try:
@@ -25,7 +41,7 @@ async def _test():
2541
asyncio.run(_test())
2642

2743

28-
def test_ai_search_response():
44+
def test_ai_search_response_async():
2945
async def _test():
3046
client = jigsawstack.AsyncJigsawStack()
3147
try:

0 commit comments

Comments
 (0)