Skip to content

Commit

Permalink
chg ! optimize find duplicates, tests
Browse files Browse the repository at this point in the history
  • Loading branch information
vitali-yanushchyk-valor committed May 31, 2024
1 parent 09adab6 commit d594a93
Show file tree
Hide file tree
Showing 5 changed files with 95 additions and 15 deletions.
15 changes: 9 additions & 6 deletions src/hope_dedup_engine/apps/faces/utils/duplication_detector.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging
import os
import re
from typing import Dict, List, Tuple
from typing import Dict, List, Set, Tuple

from django.conf import settings

Expand Down Expand Up @@ -43,8 +43,7 @@ def __init__(self, filename: str) -> None:
self.filename: str = filename
self.encodings_filename: str = f"{self.filename}.npy"
self.scale_factor: float = config.SCALE_FACTOR
# self.mean_values: Tuple[float, float, float] = tuple(map(float, config.MEAN_VALUES.split(", ")))
self.mean_values: Tuple[float, float, float] = config.MEAN_VALUES
self.mean_values: Tuple[float, float, float] = tuple(map(float, config.MEAN_VALUES.split(", ")))
self.face_detection_confidence: float = config.FACE_DETECTION_CONFIDENCE
self.face_detection_model: str = config.FACE_DETECTION_MODEL
self.distance_threshold: float = config.DISTANCE_THRESHOLD
Expand Down Expand Up @@ -161,26 +160,30 @@ def find_duplicates(self) -> Tuple[str]:
Returns:
Tuple[str]: A tuple of filenames of duplicate images.
"""
duplicated_images = set()
duplicated_images: Set[str] = set()
path1 = self.filename
try:
if not self.has_encodings:
self._encode_face()

encodings_all = self._load_encodings_all()
encodings1 = encodings_all[path1]

checked_pairs = set()
for path2, encodings2 in encodings_all.items():
if path1 != path2:
for encoding1 in encodings1:
for encoding2 in encodings2:
if (path1, path2, tuple(encoding1), tuple(encoding2)) in checked_pairs:
continue

distance = face_recognition.face_distance([encoding1], encoding2)
if distance < self.distance_threshold:
duplicated_images.update([path1, path2])
break

checked_pairs.add((path1, path2, tuple(encoding1), tuple(encoding2)))
if path2 in duplicated_images:
break

return tuple(duplicated_images)
except Exception as e:
self.logger.exception(f"Error finding duplicates for image {path1}", exc_info=e)
Expand Down
3 changes: 2 additions & 1 deletion tests/faces/faces_const.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import Final
from typing import Dict, Final

FILENAME: Final[str] = "test_file.jpg"
FILENAMES: Final[list[str]] = ["test_file.jpg", "test_file2.jpg"]
DEPLOY_PROTO_SHAPE: Final[Dict[str, int]] = {"batch_size": 1, "channels": 3, "height": 300, "width": 300}
14 changes: 11 additions & 3 deletions tests/faces/fixtures/duplication_detector.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from io import BytesIO
from unittest.mock import MagicMock, patch
from unittest.mock import MagicMock, mock_open, patch

import cv2
import numpy as np
import pytest
from PIL import Image
Expand All @@ -12,11 +13,12 @@


@pytest.fixture
def dd(mock_hope_azure_storage, mock_cv2dnn_storage, mock_hde_azure_storage, db):
def dd(mock_hope_azure_storage, mock_cv2dnn_storage, mock_hde_azure_storage, mock_prototxt_file, db):
with (
patch("hope_dedup_engine.apps.faces.utils.duplication_detector.CV2DNNStorage", mock_cv2dnn_storage),
patch("hope_dedup_engine.apps.faces.utils.duplication_detector.HOPEAzureStorage", mock_hope_azure_storage),
patch("hope_dedup_engine.apps.faces.utils.duplication_detector.HDEAzureStorage", mock_hde_azure_storage),
patch("builtins.open", mock_prototxt_file),
):
mock_cv2dnn_storage.exists.return_value = False
detector = DuplicationDetector(FILENAME)
Expand All @@ -25,6 +27,12 @@ def dd(mock_hope_azure_storage, mock_cv2dnn_storage, mock_hde_azure_storage, db)
return detector


@pytest.fixture
def mock_prototxt_file():
content = "input_shape { dim: 1 dim: 3 dim: 300 dim: 300 }"
return mock_open(read_data=content)


@pytest.fixture
def mock_cv2dnn_storage():
storage = MagicMock(spec=CV2DNNStorage)
Expand Down Expand Up @@ -80,7 +88,7 @@ def mock_open_context_manager(image_bytes_io):

@pytest.fixture
def mock_net():
mock_net = MagicMock() # Mocking the neural network object
mock_net = MagicMock(spec=cv2.dnn_Net) # Mocking the neural network object
mock_detections = np.array(
[
[
Expand Down
37 changes: 32 additions & 5 deletions tests/faces/test_duplication_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import numpy as np
import pytest
from constance import config
from faces_const import FILENAME, FILENAMES
from faces_const import DEPLOY_PROTO_SHAPE, FILENAME, FILENAMES

from hope_dedup_engine.apps.faces.utils.duplication_detector import DuplicationDetector

Expand All @@ -19,12 +19,38 @@ def test_duplication_detector_initialization(dd):
assert dd.distance_threshold == config.DISTANCE_THRESHOLD
assert dd.filename == FILENAME
assert dd.encodings_filename == f"{FILENAME}.npy"
assert dd.scale_factor == config.SCALE_FACTOR
assert dd.mean_values == tuple(map(float, config.MEAN_VALUES.split(", ")))
assert dd.face_detection_model == config.FACE_DETECTION_MODEL
assert dd.nms_threshold == config.NMS_THRESHOLD
assert dd.shape == DEPLOY_PROTO_SHAPE


def test_get_shape(dd, mock_prototxt_file):
with patch("builtins.open", mock_prototxt_file):
shape = dd._get_shape()
assert shape == DEPLOY_PROTO_SHAPE


def test_set_net(dd, mock_cv2dnn_storage, mock_net):
mock_net_instance, *_ = mock_net
with patch("cv2.dnn.readNetFromCaffe", return_value=mock_net_instance) as mock_read_net:
net = dd._set_net(mock_cv2dnn_storage)
mock_read_net.assert_called_once_with(
mock_cv2dnn_storage.path(settings.PROTOTXT_FILE),
mock_cv2dnn_storage.path(settings.CAFFEMODEL_FILE),
)

assert net == mock_net_instance
mock_net_instance.setPreferableBackend.assert_called_once_with(int(config.DNN_BACKEND))
mock_net_instance.setPreferableTarget.assert_called_once_with(int(config.DNN_TARGET))

for storage_name, storage in dd.storages.items():
assert isinstance(storage, MagicMock)
if storage_name == "cv2dnn":
storage.exists.assert_any_call(settings.PROTOTXT_FILE)
storage.exists.assert_any_call(settings.CAFFEMODEL_FILE)
storage.path.assert_any_call(settings.CAFFEMODEL_FILE)
storage.path.assert_any_call(settings.PROTOTXT_FILE)
storage.path.assert_any_call(settings.CAFFEMODEL_FILE)


Expand Down Expand Up @@ -63,10 +89,8 @@ def test_get_face_detections_dnn_with_detections(dd, mock_net, mock_open_context
patch.object(dd.storages["images"], "open", return_value=mock_open_context_manager),
patch("cv2.imdecode", imdecode),
patch("cv2.resize", resize),
patch.object(dd, "net", net),
):

dd.net.setInput(blob)
dd.net = net
face_regions = dd._get_face_detections_dnn()

assert face_regions == expected_regions
Expand Down Expand Up @@ -201,4 +225,7 @@ def test_find_duplicates_exception_handling(dd):
except Exception:
...
dd.logger.exception.assert_called_once()
args, kwargs = dd.logger.exception.call_args
assert args[0] == f"Error finding duplicates for image {dd.filename}"
assert isinstance(kwargs["exc_info"], Exception)
dd.logger.reset_mock()
41 changes: 41 additions & 0 deletions tests/faces/test_validators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from django.forms import ValidationError

import pytest

from hope_dedup_engine.apps.faces.validators import MeanValuesTupleField


def test_to_python_valid_tuple():
field = MeanValuesTupleField()
assert field.to_python("104.0, 177.0, 123.0") == (104.0, 177.0, 123.0)


def test_to_python_invalid_length():
field = MeanValuesTupleField()
with pytest.raises(ValidationError) as exc_info:
field.to_python("104.0, 177.0")
assert "Enter a valid tuple of three float values separated by commas and spaces" in str(exc_info.value)


def test_to_python_value_out_of_range():
field = MeanValuesTupleField()
with pytest.raises(ValidationError) as exc_info:
field.to_python("104.0, 177.0, 256.0")
assert "Each value must be between -255 and 255." in str(exc_info.value)


def test_to_python_non_numeric_value():
field = MeanValuesTupleField()
with pytest.raises(ValidationError) as exc_info:
field.to_python("104.0, abc, 123.0")
assert "Enter a valid tuple of three float values separated by commas and spaces" in str(exc_info.value)


def test_prepare_value_with_tuple():
field = MeanValuesTupleField()
assert field.prepare_value((104.0, 177.0, 123.0)) == "104.0, 177.0, 123.0"


def test_prepare_value_with_string():
field = MeanValuesTupleField()
assert field.prepare_value("104.0, 177.0, 123.0") == "104.0, 177.0, 123.0"

0 comments on commit d594a93

Please sign in to comment.