From d594a9361f3fff37af341e83546fdb6af8ae611f Mon Sep 17 00:00:00 2001 From: Vitali Yanushchyk Date: Fri, 31 May 2024 09:38:52 -0400 Subject: [PATCH] chg ! optimize find duplicates, tests --- .../apps/faces/utils/duplication_detector.py | 15 ++++--- tests/faces/faces_const.py | 3 +- tests/faces/fixtures/duplication_detector.py | 14 +++++-- tests/faces/test_duplication_detector.py | 37 ++++++++++++++--- tests/faces/test_validators.py | 41 +++++++++++++++++++ 5 files changed, 95 insertions(+), 15 deletions(-) create mode 100644 tests/faces/test_validators.py diff --git a/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py b/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py index 5d9fad5b..4418f84f 100644 --- a/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py +++ b/src/hope_dedup_engine/apps/faces/utils/duplication_detector.py @@ -1,7 +1,7 @@ import logging import os import re -from typing import Dict, List, Tuple +from typing import Dict, List, Set, Tuple from django.conf import settings @@ -43,8 +43,7 @@ def __init__(self, filename: str) -> None: self.filename: str = filename self.encodings_filename: str = f"{self.filename}.npy" self.scale_factor: float = config.SCALE_FACTOR - # self.mean_values: Tuple[float, float, float] = tuple(map(float, config.MEAN_VALUES.split(", "))) - self.mean_values: Tuple[float, float, float] = config.MEAN_VALUES + self.mean_values: Tuple[float, float, float] = tuple(map(float, config.MEAN_VALUES.split(", "))) self.face_detection_confidence: float = config.FACE_DETECTION_CONFIDENCE self.face_detection_model: str = config.FACE_DETECTION_MODEL self.distance_threshold: float = config.DISTANCE_THRESHOLD @@ -161,26 +160,30 @@ def find_duplicates(self) -> Tuple[str]: Returns: Tuple[str]: A tuple of filenames of duplicate images. """ - duplicated_images = set() + duplicated_images: Set[str] = set() path1 = self.filename try: if not self.has_encodings: self._encode_face() - encodings_all = self._load_encodings_all() encodings1 = encodings_all[path1] + checked_pairs = set() for path2, encodings2 in encodings_all.items(): if path1 != path2: for encoding1 in encodings1: for encoding2 in encodings2: + if (path1, path2, tuple(encoding1), tuple(encoding2)) in checked_pairs: + continue + distance = face_recognition.face_distance([encoding1], encoding2) if distance < self.distance_threshold: duplicated_images.update([path1, path2]) break + + checked_pairs.add((path1, path2, tuple(encoding1), tuple(encoding2))) if path2 in duplicated_images: break - return tuple(duplicated_images) except Exception as e: self.logger.exception(f"Error finding duplicates for image {path1}", exc_info=e) diff --git a/tests/faces/faces_const.py b/tests/faces/faces_const.py index 0d7df597..1593ebfb 100644 --- a/tests/faces/faces_const.py +++ b/tests/faces/faces_const.py @@ -1,4 +1,5 @@ -from typing import Final +from typing import Dict, Final FILENAME: Final[str] = "test_file.jpg" FILENAMES: Final[list[str]] = ["test_file.jpg", "test_file2.jpg"] +DEPLOY_PROTO_SHAPE: Final[Dict[str, int]] = {"batch_size": 1, "channels": 3, "height": 300, "width": 300} diff --git a/tests/faces/fixtures/duplication_detector.py b/tests/faces/fixtures/duplication_detector.py index 6c662247..6a833e5d 100644 --- a/tests/faces/fixtures/duplication_detector.py +++ b/tests/faces/fixtures/duplication_detector.py @@ -1,6 +1,7 @@ from io import BytesIO -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock, mock_open, patch +import cv2 import numpy as np import pytest from PIL import Image @@ -12,11 +13,12 @@ @pytest.fixture -def dd(mock_hope_azure_storage, mock_cv2dnn_storage, mock_hde_azure_storage, db): +def dd(mock_hope_azure_storage, mock_cv2dnn_storage, mock_hde_azure_storage, mock_prototxt_file, db): with ( patch("hope_dedup_engine.apps.faces.utils.duplication_detector.CV2DNNStorage", mock_cv2dnn_storage), patch("hope_dedup_engine.apps.faces.utils.duplication_detector.HOPEAzureStorage", mock_hope_azure_storage), patch("hope_dedup_engine.apps.faces.utils.duplication_detector.HDEAzureStorage", mock_hde_azure_storage), + patch("builtins.open", mock_prototxt_file), ): mock_cv2dnn_storage.exists.return_value = False detector = DuplicationDetector(FILENAME) @@ -25,6 +27,12 @@ def dd(mock_hope_azure_storage, mock_cv2dnn_storage, mock_hde_azure_storage, db) return detector +@pytest.fixture +def mock_prototxt_file(): + content = "input_shape { dim: 1 dim: 3 dim: 300 dim: 300 }" + return mock_open(read_data=content) + + @pytest.fixture def mock_cv2dnn_storage(): storage = MagicMock(spec=CV2DNNStorage) @@ -80,7 +88,7 @@ def mock_open_context_manager(image_bytes_io): @pytest.fixture def mock_net(): - mock_net = MagicMock() # Mocking the neural network object + mock_net = MagicMock(spec=cv2.dnn_Net) # Mocking the neural network object mock_detections = np.array( [ [ diff --git a/tests/faces/test_duplication_detector.py b/tests/faces/test_duplication_detector.py index 8456bd90..c03a3ef5 100644 --- a/tests/faces/test_duplication_detector.py +++ b/tests/faces/test_duplication_detector.py @@ -7,7 +7,7 @@ import numpy as np import pytest from constance import config -from faces_const import FILENAME, FILENAMES +from faces_const import DEPLOY_PROTO_SHAPE, FILENAME, FILENAMES from hope_dedup_engine.apps.faces.utils.duplication_detector import DuplicationDetector @@ -19,12 +19,38 @@ def test_duplication_detector_initialization(dd): assert dd.distance_threshold == config.DISTANCE_THRESHOLD assert dd.filename == FILENAME assert dd.encodings_filename == f"{FILENAME}.npy" + assert dd.scale_factor == config.SCALE_FACTOR + assert dd.mean_values == tuple(map(float, config.MEAN_VALUES.split(", "))) + assert dd.face_detection_model == config.FACE_DETECTION_MODEL + assert dd.nms_threshold == config.NMS_THRESHOLD + assert dd.shape == DEPLOY_PROTO_SHAPE + + +def test_get_shape(dd, mock_prototxt_file): + with patch("builtins.open", mock_prototxt_file): + shape = dd._get_shape() + assert shape == DEPLOY_PROTO_SHAPE + + +def test_set_net(dd, mock_cv2dnn_storage, mock_net): + mock_net_instance, *_ = mock_net + with patch("cv2.dnn.readNetFromCaffe", return_value=mock_net_instance) as mock_read_net: + net = dd._set_net(mock_cv2dnn_storage) + mock_read_net.assert_called_once_with( + mock_cv2dnn_storage.path(settings.PROTOTXT_FILE), + mock_cv2dnn_storage.path(settings.CAFFEMODEL_FILE), + ) + + assert net == mock_net_instance + mock_net_instance.setPreferableBackend.assert_called_once_with(int(config.DNN_BACKEND)) + mock_net_instance.setPreferableTarget.assert_called_once_with(int(config.DNN_TARGET)) + for storage_name, storage in dd.storages.items(): assert isinstance(storage, MagicMock) if storage_name == "cv2dnn": storage.exists.assert_any_call(settings.PROTOTXT_FILE) storage.exists.assert_any_call(settings.CAFFEMODEL_FILE) - storage.path.assert_any_call(settings.CAFFEMODEL_FILE) + storage.path.assert_any_call(settings.PROTOTXT_FILE) storage.path.assert_any_call(settings.CAFFEMODEL_FILE) @@ -63,10 +89,8 @@ def test_get_face_detections_dnn_with_detections(dd, mock_net, mock_open_context patch.object(dd.storages["images"], "open", return_value=mock_open_context_manager), patch("cv2.imdecode", imdecode), patch("cv2.resize", resize), + patch.object(dd, "net", net), ): - - dd.net.setInput(blob) - dd.net = net face_regions = dd._get_face_detections_dnn() assert face_regions == expected_regions @@ -201,4 +225,7 @@ def test_find_duplicates_exception_handling(dd): except Exception: ... dd.logger.exception.assert_called_once() + args, kwargs = dd.logger.exception.call_args + assert args[0] == f"Error finding duplicates for image {dd.filename}" + assert isinstance(kwargs["exc_info"], Exception) dd.logger.reset_mock() diff --git a/tests/faces/test_validators.py b/tests/faces/test_validators.py new file mode 100644 index 00000000..79b3e0df --- /dev/null +++ b/tests/faces/test_validators.py @@ -0,0 +1,41 @@ +from django.forms import ValidationError + +import pytest + +from hope_dedup_engine.apps.faces.validators import MeanValuesTupleField + + +def test_to_python_valid_tuple(): + field = MeanValuesTupleField() + assert field.to_python("104.0, 177.0, 123.0") == (104.0, 177.0, 123.0) + + +def test_to_python_invalid_length(): + field = MeanValuesTupleField() + with pytest.raises(ValidationError) as exc_info: + field.to_python("104.0, 177.0") + assert "Enter a valid tuple of three float values separated by commas and spaces" in str(exc_info.value) + + +def test_to_python_value_out_of_range(): + field = MeanValuesTupleField() + with pytest.raises(ValidationError) as exc_info: + field.to_python("104.0, 177.0, 256.0") + assert "Each value must be between -255 and 255." in str(exc_info.value) + + +def test_to_python_non_numeric_value(): + field = MeanValuesTupleField() + with pytest.raises(ValidationError) as exc_info: + field.to_python("104.0, abc, 123.0") + assert "Enter a valid tuple of three float values separated by commas and spaces" in str(exc_info.value) + + +def test_prepare_value_with_tuple(): + field = MeanValuesTupleField() + assert field.prepare_value((104.0, 177.0, 123.0)) == "104.0, 177.0, 123.0" + + +def test_prepare_value_with_string(): + field = MeanValuesTupleField() + assert field.prepare_value("104.0, 177.0, 123.0") == "104.0, 177.0, 123.0"