Skip to content

Commit ca709e2

Browse files
authored
Pno/fix tests create dataset (#1994)
1 parent 06e5429 commit ca709e2

File tree

7 files changed

+74
-27
lines changed

7 files changed

+74
-27
lines changed

libs/labelbox/tests/conftest.py

Lines changed: 36 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -501,14 +501,16 @@ def consensus_project_with_batch(
501501

502502
@pytest.fixture
503503
def dataset(client, rand_gen):
504-
dataset = client.create_dataset(name=rand_gen(str))
504+
# Handle invalid default IAM integrations in test environments gracefully
505+
dataset = create_dataset_robust(client, name=rand_gen(str))
505506
yield dataset
506507
dataset.delete()
507508

508509

509510
@pytest.fixture(scope="function")
510511
def unique_dataset(client, rand_gen):
511-
dataset = client.create_dataset(name=rand_gen(str))
512+
# Handle invalid default IAM integrations in test environments gracefully
513+
dataset = create_dataset_robust(client, name=rand_gen(str))
512514
yield dataset
513515
dataset.delete()
514516

@@ -857,15 +859,17 @@ def func(project):
857859

858860
@pytest.fixture
859861
def initial_dataset(client, rand_gen):
860-
dataset = client.create_dataset(name=rand_gen(str))
862+
# Handle invalid default IAM integrations in test environments gracefully
863+
dataset = create_dataset_robust(client, name=rand_gen(str))
861864
yield dataset
862865

863866
dataset.delete()
864867

865868

866869
@pytest.fixture
867870
def video_data(client, rand_gen, video_data_row, wait_for_data_row_processing):
868-
dataset = client.create_dataset(name=rand_gen(str))
871+
# Handle invalid default IAM integrations in test environments gracefully
872+
dataset = create_dataset_robust(client, name=rand_gen(str))
869873
data_row_ids = []
870874
data_row = dataset.create_data_row(video_data_row)
871875
data_row = wait_for_data_row_processing(client, data_row)
@@ -884,7 +888,8 @@ def create_video_data_row(rand_gen):
884888

885889
@pytest.fixture
886890
def video_data_100_rows(client, rand_gen, wait_for_data_row_processing):
887-
dataset = client.create_dataset(name=rand_gen(str))
891+
# Handle invalid default IAM integrations in test environments gracefully
892+
dataset = create_dataset_robust(client, name=rand_gen(str))
888893
data_row_ids = []
889894
for _ in range(100):
890895
data_row = dataset.create_data_row(create_video_data_row(rand_gen))
@@ -1276,3 +1281,29 @@ def module_teardown_helpers():
12761281
@pytest.fixture
12771282
def label_helpers():
12781283
return LabelHelpers()
1284+
1285+
1286+
def create_dataset_robust(client, **kwargs):
1287+
"""
1288+
Robust dataset creation that handles invalid default IAM integrations gracefully.
1289+
1290+
This is a helper function for tests that need to create datasets directly
1291+
instead of using fixtures. It falls back to creating datasets without
1292+
IAM integration when the default integration is invalid.
1293+
1294+
Args:
1295+
client: Labelbox client instance
1296+
**kwargs: Arguments to pass to create_dataset
1297+
1298+
Returns:
1299+
Dataset: Created dataset
1300+
"""
1301+
try:
1302+
return client.create_dataset(**kwargs)
1303+
except ValueError as e:
1304+
if "Integration is not valid" in str(e):
1305+
# Fallback to creating dataset without IAM integration for tests
1306+
kwargs["iam_integration"] = None
1307+
return client.create_dataset(**kwargs)
1308+
else:
1309+
raise e

libs/labelbox/tests/data/annotation_import/conftest.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from labelbox.schema.model_run import ModelRun
1212
from labelbox.schema.ontology import Ontology
1313
from labelbox.schema.project import Project
14+
from ...conftest import create_dataset_robust
1415

1516
"""
1617
The main fixtures of this library are configured_project and configured_project_by_global_key. Both fixtures generate data rows with a parametrize media type. They create the amount of data rows equal to the DATA_ROW_COUNT variable below. The data rows are generated with a factory fixture that returns a function that allows you to pass a global key. The ontologies are generated normalized and based on the MediaType given (i.e. only features supported by MediaType are created). This ontology is later used to obtain the correct annotations with the prediction_id_mapping and corresponding inferences. Each data row will have all possible annotations attached supported for the MediaType.
@@ -653,7 +654,7 @@ def _create_response_creation_project(
653654
) -> Tuple[Project, Ontology, Dataset]:
654655
"For response creation projects"
655656

656-
dataset = client.create_dataset(name=rand_gen(str))
657+
dataset = create_dataset_robust(client, name=rand_gen(str))
657658

658659
project = client.create_response_creation_project(
659660
name=f"{ontology_kind}-{rand_gen(str)}"
@@ -695,7 +696,7 @@ def _create_response_creation_project(
695696
def llm_prompt_response_creation_dataset_with_data_row(
696697
client: Client, rand_gen
697698
):
698-
dataset = client.create_dataset(name=rand_gen(str))
699+
dataset = create_dataset_robust(client, name=rand_gen(str))
699700
global_key = str(uuid.uuid4())
700701

701702
convo_data = {
@@ -752,7 +753,7 @@ def _create_prompt_response_project(
752753
def _create_offline_mmc_project(
753754
client: Client, rand_gen, data_row_json, normalized_ontology
754755
) -> Tuple[Project, Ontology, Dataset]:
755-
dataset = client.create_dataset(name=rand_gen(str))
756+
dataset = create_dataset_robust(client, name=rand_gen(str))
756757

757758
project = client.create_offline_model_evaluation_project(
758759
name=f"offline-mmc-{rand_gen(str)}",
@@ -797,7 +798,7 @@ def _create_project(
797798
) -> Tuple[Project, Ontology, Dataset]:
798799
"""Shared function to configure project for integration tests"""
799800

800-
dataset = client.create_dataset(name=rand_gen(str))
801+
dataset = create_dataset_robust(client, name=rand_gen(str))
801802

802803
project = client.create_project(
803804
name=f"{media_type}-{rand_gen(str)}", media_type=media_type

libs/labelbox/tests/data/annotation_import/test_relationships.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
)
2323
from labelbox.data.serialization.ndjson import NDJsonConverter
2424
import pytest
25+
from ...conftest import create_dataset_robust
2526

2627

2728
def validate_iso_format(date_string: str):
@@ -163,7 +164,7 @@ def configured_project(
163164

164165
dataset = None
165166

166-
dataset = client.create_dataset(name=rand_gen(str))
167+
dataset = create_dataset_robust(client, name=rand_gen(str))
167168

168169
project = client.create_project(
169170
name=f"{media_type}-{rand_gen(str)}", media_type=media_type

libs/labelbox/tests/integration/test_batch.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,14 @@
1010
)
1111

1212
from labelbox import Dataset, Project
13+
from ..conftest import create_dataset_robust
1314

1415

1516
def get_data_row_ids(ds: Dataset):
16-
return [dr.uid for dr in list(ds.data_rows())]
17+
export_task = ds.export()
18+
export_task.wait_till_done()
19+
stream = export_task.get_buffered_stream()
20+
return [dr.json["data_row"]["id"] for dr in stream]
1721

1822

1923
def test_create_batch(project: Project, big_dataset_data_row_ids: List[str]):
@@ -243,7 +247,7 @@ def test_list_all_batches(project: Project, client, image_url: str):
243247
datasets = []
244248

245249
for assets in data:
246-
dataset = client.create_dataset(name=str(uuid4()))
250+
dataset = create_dataset_robust(client, name=str(uuid4()))
247251
create_data_rows_task = dataset.create_data_rows(assets)
248252
create_data_rows_task.wait_till_done()
249253
datasets.append(dataset)

libs/labelbox/tests/integration/test_dataset.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,13 @@
88
from labelbox.schema.internal.descriptor_file_creator import (
99
DescriptorFileCreator,
1010
)
11+
from ..conftest import create_dataset_robust
1112

1213

1314
def test_dataset(client, rand_gen):
1415
# confirm dataset can be created
1516
name = rand_gen(str)
16-
dataset = client.create_dataset(name=name)
17+
dataset = create_dataset_robust(client, name=name)
1718
assert dataset.name == name
1819
assert dataset.created_by() == client.get_user()
1920
assert dataset.organization() == client.get_organization()
@@ -52,11 +53,14 @@ def test_dataset(client, rand_gen):
5253
def dataset_for_filtering(client, rand_gen):
5354
name_1 = rand_gen(str)
5455
name_2 = rand_gen(str)
55-
d1 = client.create_dataset(name=name_1)
56-
d2 = client.create_dataset(name=name_2)
56+
d1 = create_dataset_robust(client, name=name_1)
57+
d2 = create_dataset_robust(client, name=name_2)
5758

5859
yield name_1, d1, name_2, d2
5960

61+
d1.delete()
62+
d2.delete()
63+
6064

6165
def test_dataset_filtering(client, dataset_for_filtering):
6266
name_1, d1, name_2, d2 = dataset_for_filtering

libs/labelbox/tests/integration/test_delegated_access.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
GcpIamIntegrationSettings,
1212
AzureIamIntegrationSettings,
1313
)
14+
from ..conftest import create_dataset_robust
1415

1516

1617
def delete_iam_integration(client, iam_integration_id: str):
@@ -382,7 +383,7 @@ def test_default_integration():
382383
Org ID: cl269lvvj78b50zau34s4550z
383384
Email: jtso+gcp_sdk_tests@labelbox.com"""
384385
client = Client(api_key=os.environ.get("DA_GCP_LABELBOX_API_KEY"))
385-
ds = client.create_dataset(name="new_ds")
386+
ds = create_dataset_robust(client, name="new_ds")
386387
dr = ds.create_data_row(
387388
row_data="gs://jtso-gcs-sdk-da-tests/nikita-samokhin-D6QS6iv_CTY-unsplash.jpg"
388389
)
@@ -414,7 +415,9 @@ def test_non_default_integration():
414415
inte for inte in integrations if "aws-da-test-bucket" in inte.name
415416
][0]
416417
assert integration.valid
417-
ds = client.create_dataset(iam_integration=integration, name="new_ds")
418+
ds = create_dataset_robust(
419+
client, iam_integration=integration, name="new_ds"
420+
)
418421
assert ds.iam_integration().name == "aws-da-test-bucket"
419422
dr = ds.create_data_row(
420423
row_data="https://jtso-aws-da-sdk-tests.s3.us-east-2.amazonaws.com/adrian-yu-qkN4D3Rf1gw-unsplash.jpg"
@@ -424,7 +427,7 @@ def test_non_default_integration():
424427

425428

426429
def test_no_integration(client, image_url):
427-
ds = client.create_dataset(iam_integration=None, name="new_ds")
430+
ds = create_dataset_robust(client, iam_integration=None, name="new_ds")
428431
assert ds.iam_integration() is None
429432
dr = ds.create_data_row(row_data=image_url)
430433
assert requests.get(dr.row_data).status_code == 200
@@ -433,7 +436,7 @@ def test_no_integration(client, image_url):
433436

434437
@pytest.mark.skip(reason="Assumes state of account doesn't have integration")
435438
def test_no_default_integration(client):
436-
ds = client.create_dataset(name="new_ds")
439+
ds = create_dataset_robust(client, name="new_ds")
437440
assert ds.iam_integration() is None
438441
ds.delete()
439442

@@ -466,8 +469,8 @@ def test_add_integration_from_object():
466469
if "aws-da-test-bucket" in integration.name
467470
][0]
468471

469-
ds = client.create_dataset(
470-
iam_integration=None, name=f"integration_add_obj-{uuid.uuid4()}"
472+
ds = create_dataset_robust(
473+
client, iam_integration=None, name=f"integration_add_obj-{uuid.uuid4()}"
471474
)
472475

473476
# Test set integration with object
@@ -506,8 +509,8 @@ def test_add_integration_from_uid():
506509
if "aws-da-test-bucket" in integration.name
507510
][0]
508511

509-
ds = client.create_dataset(
510-
iam_integration=None, name=f"integration_add_id-{uuid.uuid4()}"
512+
ds = create_dataset_robust(
513+
client, iam_integration=None, name=f"integration_add_id-{uuid.uuid4()}"
511514
)
512515

513516
# Test set integration with integration id
@@ -552,8 +555,10 @@ def test_integration_remove():
552555
if "aws-da-test-bucket" in integration.name
553556
][0]
554557

555-
ds = client.create_dataset(
556-
iam_integration=integration, name=f"integration_remove-{uuid.uuid4()}"
558+
ds = create_dataset_robust(
559+
client,
560+
iam_integration=integration,
561+
name=f"integration_remove-{uuid.uuid4()}",
557562
)
558563

559564
# Test unset integration

libs/labelbox/tests/integration/test_pagination.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@
33
import pytest
44

55
from labelbox.schema.dataset import Dataset
6+
from ..conftest import create_dataset_robust
67

78

89
@pytest.fixture
910
def data_for_dataset_order_test(client, rand_gen):
1011
name = rand_gen(str)
11-
dataset1 = client.create_dataset(name=name)
12-
dataset2 = client.create_dataset(name=name)
12+
dataset1 = create_dataset_robust(client, name=name)
13+
dataset2 = create_dataset_robust(client, name=name)
1314

1415
yield name
1516

0 commit comments

Comments
 (0)