From f489698b2818c6ae345278ae9c8e5b695c71681c Mon Sep 17 00:00:00 2001 From: morrisnein Date: Thu, 26 Jan 2023 18:48:42 +0300 Subject: [PATCH] add tests for defining index --- test/data/dummy.csv | 4 ++++ test/unit/data/test_data.py | 37 ++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 1 deletion(-) create mode 100644 test/data/dummy.csv diff --git a/test/data/dummy.csv b/test/data/dummy.csv new file mode 100644 index 0000000000..fcc2ae5463 --- /dev/null +++ b/test/data/dummy.csv @@ -0,0 +1,4 @@ +a,b,c +1,4,7 +2,5,8 +3,6,9 diff --git a/test/unit/data/test_data.py b/test/unit/data/test_data.py index 2903802b71..1670c971a4 100644 --- a/test/unit/data/test_data.py +++ b/test/unit/data/test_data.py @@ -6,7 +6,7 @@ import pytest from sklearn.datasets import load_iris -from fedot.core.data.data import InputData +from fedot.core.data.data import InputData, get_df_from_csv from fedot.core.pipelines.node import PrimaryNode from fedot.core.pipelines.pipeline import Pipeline from fedot.core.repository.dataset_types import DataTypesEnum @@ -207,3 +207,38 @@ def test_data_convert_dt_indexes_correct(): assert np.all(train_data.supplementary_data.non_int_idx == old_train_data_idx) assert np.all(train_pred_data.supplementary_data.non_int_idx == old_train_pred_data_idx) assert np.all(test_data.supplementary_data.non_int_idx == old_test_data_idx) + + +@pytest.mark.parametrize('columns_to_use, possible_idx_keywords', + [ + (None, ['b', 'c', 'a', 'some']), + (['b', 'c'], ['a', 'some']) + ]) +def test_define_index_from_csv_with_first_index_column(columns_to_use, possible_idx_keywords): + dummy_csv_path = fedot_project_root().joinpath('test/data/dummy.csv') + df = get_df_from_csv(dummy_csv_path, delimiter=',', + columns_to_use=columns_to_use, possible_idx_keywords=possible_idx_keywords) + assert df.index.name == 'a' + assert np.array_equal(df.index, [1, 2, 3]) + assert np.array_equal(df.columns, ['b', 'c']) + assert np.array_equal(df, list(zip([4, 5, 6], [7, 8, 9]))) + + +def test_define_index_from_csv_with_non_first_index_column(): + dummy_csv_path = fedot_project_root().joinpath('test/data/dummy.csv') + df = get_df_from_csv(dummy_csv_path, delimiter=',', columns_to_use=['b', 'c'], + possible_idx_keywords=['a', 'b', 'c', 'some']) + assert df.index.name == 'b' + assert np.array_equal(df.index, [4, 5, 6]) + assert np.array_equal(df.columns, ['c']) + assert np.array_equal(df, [[7], [8], [9]]) + + +def test_define_index_from_csv_without_index_column(): + dummy_csv_path = fedot_project_root().joinpath('test/data/dummy.csv') + df = get_df_from_csv(dummy_csv_path, delimiter=',', + possible_idx_keywords=['some']) + assert df.index.name is None + assert np.array_equal(df.index, [0, 1, 2]) + assert np.array_equal(df.columns, ['a', 'b', 'c']) + assert np.array_equal(df, list(zip([1, 2, 3], [4, 5, 6], [7, 8, 9])))