From a25a09ea61e911b8e2f4410318e21fd1ad87f320 Mon Sep 17 00:00:00 2001 From: AserAbdelhakeem Date: Sun, 22 Jun 2025 16:59:31 +0200 Subject: [PATCH 1/3] edited datasetmanager tests so that they use mock objects instead of real datasets and used group to avoid race conditions in tests --- tests/test_dataset.py | 319 ++++++++++++++++++++++++++++++------------ 1 file changed, 226 insertions(+), 93 deletions(-) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index e97a2764..8250d4a2 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -1,4 +1,4 @@ -"""This code performs several tests for the dataset module. +"""This code performs several tests for the dataset module. """ from __future__ import annotations @@ -7,32 +7,68 @@ import os import zipfile from importlib.util import find_spec +from pathlib import Path +from unittest.mock import patch import pytest -from sed.core.config import USER_CONFIG_PATH from sed.dataset import dataset as ds from sed.dataset import DatasetsManager as dm package_dir = os.path.dirname(find_spec("sed").origin) json_path = os.path.join(package_dir, "config/datasets.json") +# cspell:ignore pytestmark xdist +pytestmark = pytest.mark.xdist_group(name="dataset_tests") + @pytest.fixture def zip_buffer(): zip_buffer = io.BytesIO() with zipfile.ZipFile(zip_buffer, "a", zipfile.ZIP_DEFLATED) as zip_file: zip_file.writestr("test_file.txt", "This is a test file inside the zip.") - # add a subdir zip_file.writestr("subdir/test_subdir.txt", "This is a test file inside the subdir.") return zip_buffer @pytest.fixture -def zip_file(fs, zip_buffer): - fs.create_dir("test/datasets/Test") - with open("test/datasets/Test/Test.zip", "wb") as f: - f.write(zip_buffer.getvalue()) +def zip_file(tmp_path, zip_buffer): + test_dir = tmp_path / "datasets" / "Test" + test_dir.mkdir(parents=True) + zip_path = test_dir / "Test.zip" + zip_path.write_bytes(zip_buffer.getvalue()) + return zip_path + + +@pytest.fixture +def mock_dataset_paths(tmp_path): + tmp_path = Path(tmp_path) + + user_config = tmp_path / "user_datasets.json" + folder_config = tmp_path / "folder_datasets.json" + + with patch.object(ds, "_dir", str(tmp_path / "datasets" / "Test")), patch( + "sed.core.config.USER_CONFIG_PATH", + tmp_path, + ), patch.object( + dm, + "json_path", + {"user": str(user_config), "module": json_path, "folder": str(folder_config)}, + ): + yield {"user": user_config, "folder": folder_config, "tmp_path": tmp_path} + + +@pytest.fixture +def sample_dataset_config(): + return { + "Test": { + "url": "http://test.com/files/file.zip", + "subdirs": ["subdir"], + "data_path": [], + "files": [], + }, + "TestSimple": {"url": "http://test.com/simple.zip", "data_path": [], "files": []}, + } def test_available_datasets(): @@ -54,123 +90,220 @@ def test_check_dataset_availability(): ds._check_dataset_availability() -def test_set_root_dir(): - # test with existing data path - ds.data_name = "Test" - ds._state["data_path"] = ["test/data"] - ds._set_data_dir(root_dir="test/data", use_existing=True) - assert os.path.abspath("test/data/") == ds._dir - - # test without existing data path - ds._state["data_path"] = [] - ds._set_data_dir(root_dir="test/data", use_existing=True) - assert os.path.abspath("test/data/datasets/Test") == ds._dir - - # test without data path and existing data path - ds._set_data_dir(root_dir=None, use_existing=True) - assert os.path.abspath("./datasets/Test") == ds._dir - - # test with provided data path different from existing data path - ds._state["data_path"] = ["test/data1"] - ds._set_data_dir(root_dir="test/data", use_existing=True) - assert os.path.abspath("test/data1/") == ds._dir - ds._set_data_dir(root_dir="test/data", use_existing=False) - assert os.path.abspath("test/data/datasets/Test") == ds._dir - - -def test_get_file_list(fs): - fs.create_file("test/data/file.txt") - fs.create_file("test/data/subdir/file.txt") - fs.create_file("test/data/subdir/file.zip") - fs.create_file("test/data/file.zip") - ds._dir = "test/data" - assert ["file.txt", "subdir/file.txt"] == ds._get_file_list() - - assert ["file.txt", "file.zip", "subdir/file.txt", "subdir/file.zip"] == ds._get_file_list( - ignore_zip=False, - ) - +def test_set_root_dir(mock_dataset_paths, sample_dataset_config): + """Test _set_data_dir with proper path mocking.""" + user_config = mock_dataset_paths["user"] + tmp_path = mock_dataset_paths["tmp_path"] + + # Write sample config to temporary file + user_config.write_text(json.dumps(sample_dataset_config)) + + with patch.object(ds, "_datasets", sample_dataset_config): + # test with existing data path + ds.data_name = "Test" + test_data_path = str(tmp_path / "test" / "data") + ds._state["data_path"] = [test_data_path] + ds._set_data_dir(root_dir=test_data_path, use_existing=True) + assert os.path.abspath(test_data_path + "/") == ds._dir + + # test without existing data path + ds._state["data_path"] = [] + ds._set_data_dir(root_dir=str(tmp_path / "test" / "data"), use_existing=True) + expected_dir = str(tmp_path / "test" / "data" / "datasets" / "Test") + assert os.path.abspath(expected_dir) == ds._dir + + # Additional tests using temporary paths + ds._state["data_path"] = [] + ds._set_data_dir(root_dir=None, use_existing=True) + # This should use the mocked _dir path, not real "./datasets/Test" + with patch("os.getcwd", return_value=str(tmp_path)): + ds._set_data_dir(root_dir=None, use_existing=True) + expected_mock_dir = str(tmp_path / "datasets" / "Test") + assert ds._dir == expected_mock_dir + + # Test with different provided path vs existing path + test_data_path1 = str(tmp_path / "test" / "data1") + test_data_path2 = str(tmp_path / "test" / "data2") + ds._state["data_path"] = [test_data_path1] + ds._set_data_dir(root_dir=test_data_path2, use_existing=True) + assert os.path.abspath(test_data_path1 + "/") == ds._dir + ds._set_data_dir(root_dir=test_data_path2, use_existing=False) + expected_dir = str(tmp_path / "test" / "data2" / "datasets" / "Test") + assert os.path.abspath(expected_dir) == ds._dir + + +def test_get_file_list(tmp_path): + tmp_path = Path(tmp_path) + + test_dir = tmp_path / "test" / "data" + test_dir.mkdir(parents=True, exist_ok=True) + + (test_dir / "file.txt").write_text("content") + (test_dir / "file.zip").write_text("zip content") + + subdir = test_dir / "subdir" + subdir.mkdir(exist_ok=True) + (subdir / "file.txt").write_text("content") + (subdir / "file.zip").write_text("zip content") + + ds._dir = str(test_dir) + assert sorted(ds._get_file_list()) == ["file.txt", "subdir/file.txt"] + assert sorted(ds._get_file_list(ignore_zip=False)) == [ + "file.txt", + "file.zip", + "subdir/file.txt", + "subdir/file.zip", + ] + + +def test_download_data( + tmp_path, + requests_mock, + zip_buffer, + mock_dataset_paths, + sample_dataset_config, +): + tmp_path = Path(tmp_path) + + user_config = mock_dataset_paths["user"] + user_config.write_text(json.dumps(sample_dataset_config)) + + test_dir = tmp_path / "test" + test_dir.mkdir(exist_ok=True) -def test_download_data(fs, requests_mock, zip_buffer): - fs.create_dir("test") data_url = "http://test.com/files/file.zip" requests_mock.get(data_url, content=zip_buffer.getvalue()) - ds._data_name = "Test" - ds._state = {"data_path": []} - ds._set_data_dir(root_dir="test", use_existing=True) - ds._download_data(data_url) - assert os.path.exists("test/datasets/Test/Test.zip") + + with patch.object(ds, "_datasets", sample_dataset_config): + ds._data_name = "Test" + ds._state = {"data_path": []} + ds._set_data_dir(root_dir=str(test_dir), use_existing=True) + ds._download_data(data_url) + + expected_path = test_dir / "datasets" / "Test" / "Test.zip" + assert expected_path.exists() # assert not ds._download_data("data", "test/data/", data_url) # already exists -def test_extract_data(zip_file): # noqa: ARG001 - ds._data_name = "Test" - ds._dir = "test/datasets/Test/" - ds._extract_data() - assert os.path.exists("test/datasets/Test/test_file.txt") - assert os.path.exists("test/datasets/Test/subdir/test_subdir.txt") +def test_extract_data(tmp_path, zip_buffer, mock_dataset_paths, sample_dataset_config): + """Test extraction with proper isolation.""" + tmp_path = Path(tmp_path) + + user_config = mock_dataset_paths["user"] + user_config.write_text(json.dumps(sample_dataset_config)) + + test_dir = tmp_path / "test" / "datasets" / "Test" + test_dir.mkdir(parents=True, exist_ok=True) + + # Create zip file in test directory + zip_path = test_dir / "Test.zip" + zip_path.write_bytes(zip_buffer.getvalue()) + with patch.object(ds, "_datasets", sample_dataset_config): + ds._data_name = "Test" + ds._dir = str(test_dir) + ds._extract_data() -def test_rearrange_data(zip_file): # noqa: ARG001 - ds._data_name = "Test" - ds._dir = "test/datasets/Test/" - ds._subdirs = ["subdir"] - ds._extract_data() - ds._rearrange_data() - assert os.path.exists("test/datasets/Test/test_file.txt") - assert os.path.exists("test/datasets/Test/test_subdir.txt") - assert not os.path.exists("test/datasets/Test/subdir") + assert (test_dir / "test_file.txt").exists() + assert (test_dir / "subdir" / "test_subdir.txt").exists() - with pytest.raises(FileNotFoundError): - ds._subdirs = ["non_existing_subdir"] + +def test_rearrange_data(tmp_path, zip_buffer, mock_dataset_paths, sample_dataset_config): + """Test rearrangement with proper isolation.""" + tmp_path = Path(tmp_path) + + user_config = mock_dataset_paths["user"] + user_config.write_text(json.dumps(sample_dataset_config)) + + test_dir = tmp_path / "test" / "datasets" / "Test" + test_dir.mkdir(parents=True, exist_ok=True) + + zip_path = test_dir / "Test.zip" + zip_path.write_bytes(zip_buffer.getvalue()) + + with patch.object(ds, "_datasets", sample_dataset_config): + ds._data_name = "Test" + ds._dir = str(test_dir) + ds._subdirs = ["subdir"] + ds._extract_data() ds._rearrange_data() + assert (test_dir / "test_file.txt").exists() + assert (test_dir / "test_subdir.txt").exists() + assert not (test_dir / "subdir").exists() + + with pytest.raises(FileNotFoundError): + ds._subdirs = ["non_existing_subdir"] + ds._rearrange_data() -def test_get_remove_dataset(requests_mock, zip_buffer): - json_path_user = USER_CONFIG_PATH.joinpath("datasets.json") - data_name = "Test" - _ = dm.load_datasets_dict() # to ensure datasets.json is in user dir - ds.remove(data_name) +def test_get_remove_dataset( + tmp_path, + requests_mock, + zip_buffer, + mock_dataset_paths, + sample_dataset_config, +): + tmp_path = Path(tmp_path) + + user_config = mock_dataset_paths["user"] + user_config.write_text(json.dumps(sample_dataset_config)) data_url = "http://test.com/files/file.zip" requests_mock.get(data_url, content=zip_buffer.getvalue()) - ds.get(data_name) - assert ds.dir == os.path.abspath(os.path.join("./datasets", data_name)) + with patch.object(ds, "_datasets", sample_dataset_config): + data_name = "Test" + + ds.remove(data_name) - # check if subdir is removed after rearranging - assert not os.path.exists("./datasets/Test/subdir") + ds.get(data_name, root_dir=str(tmp_path), use_existing=False) - # check datasets file to now have data_path listed - datasets_json = json.load(open(json_path_user)) - assert datasets_json[data_name]["data_path"] - assert datasets_json[data_name]["files"] - ds.remove(data_name) + expected_dir = tmp_path / "datasets" / "Test" + assert ds.dir == str(expected_dir) - assert not os.path.exists(os.path.join("./datasets", data_name)) + # Check if subdir is removed after rearranging (if subdirs are configured) + assert not (expected_dir / "subdir").exists() - ds.get(data_name) - ds.get(data_name) - ds.remove(data_name, ds.existing_data_paths[0]) + # Check datasets file to now have data_path listed + datasets_json = json.loads(user_config.read_text()) + assert datasets_json[data_name]["data_path"] + assert datasets_json[data_name]["files"] + ds.remove(data_name) + assert not expected_dir.exists() + ds.get(data_name, root_dir=str(tmp_path), use_existing=False) + ds.get(data_name, root_dir=str(tmp_path), use_existing=False) + + if hasattr(ds, "existing_data_paths") and ds.existing_data_paths: + ds.remove(data_name, ds.existing_data_paths[0]) + else: + ds.remove(data_name) + + +def test_datasets_manager(mock_dataset_paths): + """Test dataset manager with proper isolation.""" + user_config = mock_dataset_paths["user"] + folder_config = mock_dataset_paths["folder"] -def test_datasets_manager(): dm.add( "Test_DM", {"url": "http://test.com/files/file.zip", "subdirs": ["subdir"]}, levels=["folder", "user"], ) - datasets_json = json.load(open(dm.json_path["folder"])) - assert datasets_json["Test_DM"] - assert datasets_json["Test_DM"]["url"] == "http://test.com/files/file.zip" - assert datasets_json["Test_DM"]["subdirs"] == ["subdir"] + # Check configurations were written to temporary files + folder_data = json.loads(folder_config.read_text()) + assert "Test_DM" in folder_data + + user_data = json.loads(user_config.read_text()) + assert "Test_DM" in user_data + + # Test removal dm.remove("Test_DM", levels=["folder"]) - datasets_json = json.load(open(dm.json_path["folder"])) - with pytest.raises(KeyError): - datasets_json["Test_DM"] + folder_data = json.loads(folder_config.read_text()) + assert "Test_DM" not in folder_data datasets_json = json.load(open(dm.json_path["user"])) assert datasets_json["Test_DM"] From 8526eba65ab807149628e2060059c7dfc7ed8342 Mon Sep 17 00:00:00 2001 From: AserAbdelhakeem Date: Fri, 27 Jun 2025 18:17:10 +0200 Subject: [PATCH 2/3] redundant line removed --- tests/test_dataset.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 8250d4a2..383b2c84 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -113,9 +113,6 @@ def test_set_root_dir(mock_dataset_paths, sample_dataset_config): assert os.path.abspath(expected_dir) == ds._dir # Additional tests using temporary paths - ds._state["data_path"] = [] - ds._set_data_dir(root_dir=None, use_existing=True) - # This should use the mocked _dir path, not real "./datasets/Test" with patch("os.getcwd", return_value=str(tmp_path)): ds._set_data_dir(root_dir=None, use_existing=True) expected_mock_dir = str(tmp_path / "datasets" / "Test") From 6b9ec15fe119ee9a9a87aa32086b4ce526c21fb9 Mon Sep 17 00:00:00 2001 From: AserAbdelhakeem Date: Fri, 27 Jun 2025 18:20:20 +0200 Subject: [PATCH 3/3] removed grouping --- tests/test_dataset.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/test_dataset.py b/tests/test_dataset.py index 383b2c84..03b58bb5 100644 --- a/tests/test_dataset.py +++ b/tests/test_dataset.py @@ -18,9 +18,6 @@ package_dir = os.path.dirname(find_spec("sed").origin) json_path = os.path.join(package_dir, "config/datasets.json") -# cspell:ignore pytestmark xdist -pytestmark = pytest.mark.xdist_group(name="dataset_tests") - @pytest.fixture def zip_buffer():