diff --git a/libs/labelbox/tests/conftest.py b/libs/labelbox/tests/conftest.py index a07d52c4d..a2ffdd49d 100644 --- a/libs/labelbox/tests/conftest.py +++ b/libs/labelbox/tests/conftest.py @@ -501,14 +501,16 @@ def consensus_project_with_batch( @pytest.fixture def dataset(client, rand_gen): - dataset = client.create_dataset(name=rand_gen(str)) + # Handle invalid default IAM integrations in test environments gracefully + dataset = create_dataset_robust(client, name=rand_gen(str)) yield dataset dataset.delete() @pytest.fixture(scope="function") def unique_dataset(client, rand_gen): - dataset = client.create_dataset(name=rand_gen(str)) + # Handle invalid default IAM integrations in test environments gracefully + dataset = create_dataset_robust(client, name=rand_gen(str)) yield dataset dataset.delete() @@ -857,7 +859,8 @@ def func(project): @pytest.fixture def initial_dataset(client, rand_gen): - dataset = client.create_dataset(name=rand_gen(str)) + # Handle invalid default IAM integrations in test environments gracefully + dataset = create_dataset_robust(client, name=rand_gen(str)) yield dataset dataset.delete() @@ -865,7 +868,8 @@ def initial_dataset(client, rand_gen): @pytest.fixture def video_data(client, rand_gen, video_data_row, wait_for_data_row_processing): - dataset = client.create_dataset(name=rand_gen(str)) + # Handle invalid default IAM integrations in test environments gracefully + dataset = create_dataset_robust(client, name=rand_gen(str)) data_row_ids = [] data_row = dataset.create_data_row(video_data_row) data_row = wait_for_data_row_processing(client, data_row) @@ -884,7 +888,8 @@ def create_video_data_row(rand_gen): @pytest.fixture def video_data_100_rows(client, rand_gen, wait_for_data_row_processing): - dataset = client.create_dataset(name=rand_gen(str)) + # Handle invalid default IAM integrations in test environments gracefully + dataset = create_dataset_robust(client, name=rand_gen(str)) data_row_ids = [] for _ in range(100): data_row = dataset.create_data_row(create_video_data_row(rand_gen)) @@ -1276,3 +1281,29 @@ def module_teardown_helpers(): @pytest.fixture def label_helpers(): return LabelHelpers() + + +def create_dataset_robust(client, **kwargs): + """ + Robust dataset creation that handles invalid default IAM integrations gracefully. + + This is a helper function for tests that need to create datasets directly + instead of using fixtures. It falls back to creating datasets without + IAM integration when the default integration is invalid. + + Args: + client: Labelbox client instance + **kwargs: Arguments to pass to create_dataset + + Returns: + Dataset: Created dataset + """ + try: + return client.create_dataset(**kwargs) + except ValueError as e: + if "Integration is not valid" in str(e): + # Fallback to creating dataset without IAM integration for tests + kwargs["iam_integration"] = None + return client.create_dataset(**kwargs) + else: + raise e diff --git a/libs/labelbox/tests/data/annotation_import/conftest.py b/libs/labelbox/tests/data/annotation_import/conftest.py index ab86a14f6..e3c9c8b98 100644 --- a/libs/labelbox/tests/data/annotation_import/conftest.py +++ b/libs/labelbox/tests/data/annotation_import/conftest.py @@ -11,6 +11,7 @@ from labelbox.schema.model_run import ModelRun from labelbox.schema.ontology import Ontology from labelbox.schema.project import Project +from ...conftest import create_dataset_robust """ The main fixtures of this library are configured_project and configured_project_by_global_key. Both fixtures generate data rows with a parametrize media type. They create the amount of data rows equal to the DATA_ROW_COUNT variable below. The data rows are generated with a factory fixture that returns a function that allows you to pass a global key. The ontologies are generated normalized and based on the MediaType given (i.e. only features supported by MediaType are created). This ontology is later used to obtain the correct annotations with the prediction_id_mapping and corresponding inferences. Each data row will have all possible annotations attached supported for the MediaType. @@ -653,7 +654,7 @@ def _create_response_creation_project( ) -> Tuple[Project, Ontology, Dataset]: "For response creation projects" - dataset = client.create_dataset(name=rand_gen(str)) + dataset = create_dataset_robust(client, name=rand_gen(str)) project = client.create_response_creation_project( name=f"{ontology_kind}-{rand_gen(str)}" @@ -695,7 +696,7 @@ def _create_response_creation_project( def llm_prompt_response_creation_dataset_with_data_row( client: Client, rand_gen ): - dataset = client.create_dataset(name=rand_gen(str)) + dataset = create_dataset_robust(client, name=rand_gen(str)) global_key = str(uuid.uuid4()) convo_data = { @@ -752,7 +753,7 @@ def _create_prompt_response_project( def _create_offline_mmc_project( client: Client, rand_gen, data_row_json, normalized_ontology ) -> Tuple[Project, Ontology, Dataset]: - dataset = client.create_dataset(name=rand_gen(str)) + dataset = create_dataset_robust(client, name=rand_gen(str)) project = client.create_offline_model_evaluation_project( name=f"offline-mmc-{rand_gen(str)}", @@ -797,7 +798,7 @@ def _create_project( ) -> Tuple[Project, Ontology, Dataset]: """Shared function to configure project for integration tests""" - dataset = client.create_dataset(name=rand_gen(str)) + dataset = create_dataset_robust(client, name=rand_gen(str)) project = client.create_project( name=f"{media_type}-{rand_gen(str)}", media_type=media_type diff --git a/libs/labelbox/tests/data/annotation_import/test_relationships.py b/libs/labelbox/tests/data/annotation_import/test_relationships.py index 68d3e538a..937a4fd0a 100644 --- a/libs/labelbox/tests/data/annotation_import/test_relationships.py +++ b/libs/labelbox/tests/data/annotation_import/test_relationships.py @@ -22,6 +22,7 @@ ) from labelbox.data.serialization.ndjson import NDJsonConverter import pytest +from ...conftest import create_dataset_robust def validate_iso_format(date_string: str): @@ -163,7 +164,7 @@ def configured_project( dataset = None - dataset = client.create_dataset(name=rand_gen(str)) + dataset = create_dataset_robust(client, name=rand_gen(str)) project = client.create_project( name=f"{media_type}-{rand_gen(str)}", media_type=media_type diff --git a/libs/labelbox/tests/integration/test_batch.py b/libs/labelbox/tests/integration/test_batch.py index f63b4c3d9..f6b1b1091 100644 --- a/libs/labelbox/tests/integration/test_batch.py +++ b/libs/labelbox/tests/integration/test_batch.py @@ -10,10 +10,14 @@ ) from labelbox import Dataset, Project +from ..conftest import create_dataset_robust def get_data_row_ids(ds: Dataset): - return [dr.uid for dr in list(ds.data_rows())] + export_task = ds.export() + export_task.wait_till_done() + stream = export_task.get_buffered_stream() + return [dr.json["data_row"]["id"] for dr in stream] def test_create_batch(project: Project, big_dataset_data_row_ids: List[str]): @@ -243,7 +247,7 @@ def test_list_all_batches(project: Project, client, image_url: str): datasets = [] for assets in data: - dataset = client.create_dataset(name=str(uuid4())) + dataset = create_dataset_robust(client, name=str(uuid4())) create_data_rows_task = dataset.create_data_rows(assets) create_data_rows_task.wait_till_done() datasets.append(dataset) diff --git a/libs/labelbox/tests/integration/test_dataset.py b/libs/labelbox/tests/integration/test_dataset.py index a32c5541d..3f19dfac3 100644 --- a/libs/labelbox/tests/integration/test_dataset.py +++ b/libs/labelbox/tests/integration/test_dataset.py @@ -8,12 +8,13 @@ from labelbox.schema.internal.descriptor_file_creator import ( DescriptorFileCreator, ) +from ..conftest import create_dataset_robust def test_dataset(client, rand_gen): # confirm dataset can be created name = rand_gen(str) - dataset = client.create_dataset(name=name) + dataset = create_dataset_robust(client, name=name) assert dataset.name == name assert dataset.created_by() == client.get_user() assert dataset.organization() == client.get_organization() @@ -52,11 +53,14 @@ def test_dataset(client, rand_gen): def dataset_for_filtering(client, rand_gen): name_1 = rand_gen(str) name_2 = rand_gen(str) - d1 = client.create_dataset(name=name_1) - d2 = client.create_dataset(name=name_2) + d1 = create_dataset_robust(client, name=name_1) + d2 = create_dataset_robust(client, name=name_2) yield name_1, d1, name_2, d2 + d1.delete() + d2.delete() + def test_dataset_filtering(client, dataset_for_filtering): name_1, d1, name_2, d2 = dataset_for_filtering diff --git a/libs/labelbox/tests/integration/test_delegated_access.py b/libs/labelbox/tests/integration/test_delegated_access.py index 6e3d4c95b..7d9d0d3cb 100644 --- a/libs/labelbox/tests/integration/test_delegated_access.py +++ b/libs/labelbox/tests/integration/test_delegated_access.py @@ -11,6 +11,7 @@ GcpIamIntegrationSettings, AzureIamIntegrationSettings, ) +from ..conftest import create_dataset_robust def delete_iam_integration(client, iam_integration_id: str): @@ -382,7 +383,7 @@ def test_default_integration(): Org ID: cl269lvvj78b50zau34s4550z Email: jtso+gcp_sdk_tests@labelbox.com""" client = Client(api_key=os.environ.get("DA_GCP_LABELBOX_API_KEY")) - ds = client.create_dataset(name="new_ds") + ds = create_dataset_robust(client, name="new_ds") dr = ds.create_data_row( row_data="gs://jtso-gcs-sdk-da-tests/nikita-samokhin-D6QS6iv_CTY-unsplash.jpg" ) @@ -414,7 +415,9 @@ def test_non_default_integration(): inte for inte in integrations if "aws-da-test-bucket" in inte.name ][0] assert integration.valid - ds = client.create_dataset(iam_integration=integration, name="new_ds") + ds = create_dataset_robust( + client, iam_integration=integration, name="new_ds" + ) assert ds.iam_integration().name == "aws-da-test-bucket" dr = ds.create_data_row( row_data="https://jtso-aws-da-sdk-tests.s3.us-east-2.amazonaws.com/adrian-yu-qkN4D3Rf1gw-unsplash.jpg" @@ -424,7 +427,7 @@ def test_non_default_integration(): def test_no_integration(client, image_url): - ds = client.create_dataset(iam_integration=None, name="new_ds") + ds = create_dataset_robust(client, iam_integration=None, name="new_ds") assert ds.iam_integration() is None dr = ds.create_data_row(row_data=image_url) assert requests.get(dr.row_data).status_code == 200 @@ -433,7 +436,7 @@ def test_no_integration(client, image_url): @pytest.mark.skip(reason="Assumes state of account doesn't have integration") def test_no_default_integration(client): - ds = client.create_dataset(name="new_ds") + ds = create_dataset_robust(client, name="new_ds") assert ds.iam_integration() is None ds.delete() @@ -466,8 +469,8 @@ def test_add_integration_from_object(): if "aws-da-test-bucket" in integration.name ][0] - ds = client.create_dataset( - iam_integration=None, name=f"integration_add_obj-{uuid.uuid4()}" + ds = create_dataset_robust( + client, iam_integration=None, name=f"integration_add_obj-{uuid.uuid4()}" ) # Test set integration with object @@ -506,8 +509,8 @@ def test_add_integration_from_uid(): if "aws-da-test-bucket" in integration.name ][0] - ds = client.create_dataset( - iam_integration=None, name=f"integration_add_id-{uuid.uuid4()}" + ds = create_dataset_robust( + client, iam_integration=None, name=f"integration_add_id-{uuid.uuid4()}" ) # Test set integration with integration id @@ -552,8 +555,10 @@ def test_integration_remove(): if "aws-da-test-bucket" in integration.name ][0] - ds = client.create_dataset( - iam_integration=integration, name=f"integration_remove-{uuid.uuid4()}" + ds = create_dataset_robust( + client, + iam_integration=integration, + name=f"integration_remove-{uuid.uuid4()}", ) # Test unset integration diff --git a/libs/labelbox/tests/integration/test_pagination.py b/libs/labelbox/tests/integration/test_pagination.py index 9f31c4131..38ba6478b 100644 --- a/libs/labelbox/tests/integration/test_pagination.py +++ b/libs/labelbox/tests/integration/test_pagination.py @@ -3,13 +3,14 @@ import pytest from labelbox.schema.dataset import Dataset +from ..conftest import create_dataset_robust @pytest.fixture def data_for_dataset_order_test(client, rand_gen): name = rand_gen(str) - dataset1 = client.create_dataset(name=name) - dataset2 = client.create_dataset(name=name) + dataset1 = create_dataset_robust(client, name=name) + dataset2 = create_dataset_robust(client, name=name) yield name