Skip to content

Pno/fix tests create dataset #1994

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 36 additions & 5 deletions libs/labelbox/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,14 +501,16 @@ def consensus_project_with_batch(

@pytest.fixture
def dataset(client, rand_gen):
dataset = client.create_dataset(name=rand_gen(str))
# Handle invalid default IAM integrations in test environments gracefully
dataset = create_dataset_robust(client, name=rand_gen(str))
yield dataset
dataset.delete()


@pytest.fixture(scope="function")
def unique_dataset(client, rand_gen):
dataset = client.create_dataset(name=rand_gen(str))
# Handle invalid default IAM integrations in test environments gracefully
dataset = create_dataset_robust(client, name=rand_gen(str))
yield dataset
dataset.delete()

Expand Down Expand Up @@ -857,15 +859,17 @@ def func(project):

@pytest.fixture
def initial_dataset(client, rand_gen):
dataset = client.create_dataset(name=rand_gen(str))
# Handle invalid default IAM integrations in test environments gracefully
dataset = create_dataset_robust(client, name=rand_gen(str))
yield dataset

dataset.delete()


@pytest.fixture
def video_data(client, rand_gen, video_data_row, wait_for_data_row_processing):
dataset = client.create_dataset(name=rand_gen(str))
# Handle invalid default IAM integrations in test environments gracefully
dataset = create_dataset_robust(client, name=rand_gen(str))
data_row_ids = []
data_row = dataset.create_data_row(video_data_row)
data_row = wait_for_data_row_processing(client, data_row)
Expand All @@ -884,7 +888,8 @@ def create_video_data_row(rand_gen):

@pytest.fixture
def video_data_100_rows(client, rand_gen, wait_for_data_row_processing):
dataset = client.create_dataset(name=rand_gen(str))
# Handle invalid default IAM integrations in test environments gracefully
dataset = create_dataset_robust(client, name=rand_gen(str))
data_row_ids = []
for _ in range(100):
data_row = dataset.create_data_row(create_video_data_row(rand_gen))
Expand Down Expand Up @@ -1276,3 +1281,29 @@ def module_teardown_helpers():
@pytest.fixture
def label_helpers():
return LabelHelpers()


def create_dataset_robust(client, **kwargs):
"""
Robust dataset creation that handles invalid default IAM integrations gracefully.

This is a helper function for tests that need to create datasets directly
instead of using fixtures. It falls back to creating datasets without
IAM integration when the default integration is invalid.

Args:
client: Labelbox client instance
**kwargs: Arguments to pass to create_dataset

Returns:
Dataset: Created dataset
"""
try:
return client.create_dataset(**kwargs)
except ValueError as e:
if "Integration is not valid" in str(e):
# Fallback to creating dataset without IAM integration for tests
kwargs["iam_integration"] = None
return client.create_dataset(**kwargs)
else:
raise e
9 changes: 5 additions & 4 deletions libs/labelbox/tests/data/annotation_import/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from labelbox.schema.model_run import ModelRun
from labelbox.schema.ontology import Ontology
from labelbox.schema.project import Project
from ...conftest import create_dataset_robust

"""
The main fixtures of this library are configured_project and configured_project_by_global_key. Both fixtures generate data rows with a parametrize media type. They create the amount of data rows equal to the DATA_ROW_COUNT variable below. The data rows are generated with a factory fixture that returns a function that allows you to pass a global key. The ontologies are generated normalized and based on the MediaType given (i.e. only features supported by MediaType are created). This ontology is later used to obtain the correct annotations with the prediction_id_mapping and corresponding inferences. Each data row will have all possible annotations attached supported for the MediaType.
Expand Down Expand Up @@ -653,7 +654,7 @@ def _create_response_creation_project(
) -> Tuple[Project, Ontology, Dataset]:
"For response creation projects"

dataset = client.create_dataset(name=rand_gen(str))
dataset = create_dataset_robust(client, name=rand_gen(str))

project = client.create_response_creation_project(
name=f"{ontology_kind}-{rand_gen(str)}"
Expand Down Expand Up @@ -695,7 +696,7 @@ def _create_response_creation_project(
def llm_prompt_response_creation_dataset_with_data_row(
client: Client, rand_gen
):
dataset = client.create_dataset(name=rand_gen(str))
dataset = create_dataset_robust(client, name=rand_gen(str))
global_key = str(uuid.uuid4())

convo_data = {
Expand Down Expand Up @@ -752,7 +753,7 @@ def _create_prompt_response_project(
def _create_offline_mmc_project(
client: Client, rand_gen, data_row_json, normalized_ontology
) -> Tuple[Project, Ontology, Dataset]:
dataset = client.create_dataset(name=rand_gen(str))
dataset = create_dataset_robust(client, name=rand_gen(str))

project = client.create_offline_model_evaluation_project(
name=f"offline-mmc-{rand_gen(str)}",
Expand Down Expand Up @@ -797,7 +798,7 @@ def _create_project(
) -> Tuple[Project, Ontology, Dataset]:
"""Shared function to configure project for integration tests"""

dataset = client.create_dataset(name=rand_gen(str))
dataset = create_dataset_robust(client, name=rand_gen(str))

project = client.create_project(
name=f"{media_type}-{rand_gen(str)}", media_type=media_type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
)
from labelbox.data.serialization.ndjson import NDJsonConverter
import pytest
from ...conftest import create_dataset_robust


def validate_iso_format(date_string: str):
Expand Down Expand Up @@ -163,7 +164,7 @@ def configured_project(

dataset = None

dataset = client.create_dataset(name=rand_gen(str))
dataset = create_dataset_robust(client, name=rand_gen(str))

project = client.create_project(
name=f"{media_type}-{rand_gen(str)}", media_type=media_type
Expand Down
8 changes: 6 additions & 2 deletions libs/labelbox/tests/integration/test_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@
)

from labelbox import Dataset, Project
from ..conftest import create_dataset_robust


def get_data_row_ids(ds: Dataset):
return [dr.uid for dr in list(ds.data_rows())]
export_task = ds.export()
export_task.wait_till_done()
stream = export_task.get_buffered_stream()
return [dr.json["data_row"]["id"] for dr in stream]


def test_create_batch(project: Project, big_dataset_data_row_ids: List[str]):
Expand Down Expand Up @@ -243,7 +247,7 @@ def test_list_all_batches(project: Project, client, image_url: str):
datasets = []

for assets in data:
dataset = client.create_dataset(name=str(uuid4()))
dataset = create_dataset_robust(client, name=str(uuid4()))
create_data_rows_task = dataset.create_data_rows(assets)
create_data_rows_task.wait_till_done()
datasets.append(dataset)
Expand Down
10 changes: 7 additions & 3 deletions libs/labelbox/tests/integration/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
from labelbox.schema.internal.descriptor_file_creator import (
DescriptorFileCreator,
)
from ..conftest import create_dataset_robust


def test_dataset(client, rand_gen):
# confirm dataset can be created
name = rand_gen(str)
dataset = client.create_dataset(name=name)
dataset = create_dataset_robust(client, name=name)
assert dataset.name == name
assert dataset.created_by() == client.get_user()
assert dataset.organization() == client.get_organization()
Expand Down Expand Up @@ -52,11 +53,14 @@ def test_dataset(client, rand_gen):
def dataset_for_filtering(client, rand_gen):
name_1 = rand_gen(str)
name_2 = rand_gen(str)
d1 = client.create_dataset(name=name_1)
d2 = client.create_dataset(name=name_2)
d1 = create_dataset_robust(client, name=name_1)
d2 = create_dataset_robust(client, name=name_2)

yield name_1, d1, name_2, d2

d1.delete()
d2.delete()


def test_dataset_filtering(client, dataset_for_filtering):
name_1, d1, name_2, d2 = dataset_for_filtering
Expand Down
25 changes: 15 additions & 10 deletions libs/labelbox/tests/integration/test_delegated_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
GcpIamIntegrationSettings,
AzureIamIntegrationSettings,
)
from ..conftest import create_dataset_robust


def delete_iam_integration(client, iam_integration_id: str):
Expand Down Expand Up @@ -382,7 +383,7 @@ def test_default_integration():
Org ID: cl269lvvj78b50zau34s4550z
Email: jtso+gcp_sdk_tests@labelbox.com"""
client = Client(api_key=os.environ.get("DA_GCP_LABELBOX_API_KEY"))
ds = client.create_dataset(name="new_ds")
ds = create_dataset_robust(client, name="new_ds")
dr = ds.create_data_row(
row_data="gs://jtso-gcs-sdk-da-tests/nikita-samokhin-D6QS6iv_CTY-unsplash.jpg"
)
Expand Down Expand Up @@ -414,7 +415,9 @@ def test_non_default_integration():
inte for inte in integrations if "aws-da-test-bucket" in inte.name
][0]
assert integration.valid
ds = client.create_dataset(iam_integration=integration, name="new_ds")
ds = create_dataset_robust(
client, iam_integration=integration, name="new_ds"
)
assert ds.iam_integration().name == "aws-da-test-bucket"
dr = ds.create_data_row(
row_data="https://jtso-aws-da-sdk-tests.s3.us-east-2.amazonaws.com/adrian-yu-qkN4D3Rf1gw-unsplash.jpg"
Expand All @@ -424,7 +427,7 @@ def test_non_default_integration():


def test_no_integration(client, image_url):
ds = client.create_dataset(iam_integration=None, name="new_ds")
ds = create_dataset_robust(client, iam_integration=None, name="new_ds")
assert ds.iam_integration() is None
dr = ds.create_data_row(row_data=image_url)
assert requests.get(dr.row_data).status_code == 200
Expand All @@ -433,7 +436,7 @@ def test_no_integration(client, image_url):

@pytest.mark.skip(reason="Assumes state of account doesn't have integration")
def test_no_default_integration(client):
ds = client.create_dataset(name="new_ds")
ds = create_dataset_robust(client, name="new_ds")
assert ds.iam_integration() is None
ds.delete()

Expand Down Expand Up @@ -466,8 +469,8 @@ def test_add_integration_from_object():
if "aws-da-test-bucket" in integration.name
][0]

ds = client.create_dataset(
iam_integration=None, name=f"integration_add_obj-{uuid.uuid4()}"
ds = create_dataset_robust(
client, iam_integration=None, name=f"integration_add_obj-{uuid.uuid4()}"
)

# Test set integration with object
Expand Down Expand Up @@ -506,8 +509,8 @@ def test_add_integration_from_uid():
if "aws-da-test-bucket" in integration.name
][0]

ds = client.create_dataset(
iam_integration=None, name=f"integration_add_id-{uuid.uuid4()}"
ds = create_dataset_robust(
client, iam_integration=None, name=f"integration_add_id-{uuid.uuid4()}"
)

# Test set integration with integration id
Expand Down Expand Up @@ -552,8 +555,10 @@ def test_integration_remove():
if "aws-da-test-bucket" in integration.name
][0]

ds = client.create_dataset(
iam_integration=integration, name=f"integration_remove-{uuid.uuid4()}"
ds = create_dataset_robust(
client,
iam_integration=integration,
name=f"integration_remove-{uuid.uuid4()}",
)

# Test unset integration
Expand Down
5 changes: 3 additions & 2 deletions libs/labelbox/tests/integration/test_pagination.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
import pytest

from labelbox.schema.dataset import Dataset
from ..conftest import create_dataset_robust


@pytest.fixture
def data_for_dataset_order_test(client, rand_gen):
name = rand_gen(str)
dataset1 = client.create_dataset(name=name)
dataset2 = client.create_dataset(name=name)
dataset1 = create_dataset_robust(client, name=name)
dataset2 = create_dataset_robust(client, name=name)

yield name

Expand Down
Loading