Rewrite to avoid dependency on OpenCV

FWDekker · Jan 3, 2023 · 3279fe8 · 3279fe8
1 parent 8ea8d81
commit 3279fe8
Show file tree

Hide file tree

Showing 11 changed files with 89 additions and 92 deletions.
diff --git a/README.md b/README.md
@@ -6,17 +6,14 @@ compiles these frames into a timelapse.
 
 ## Installation
 ### Windows
-1. [Install  Visual C++ Redistributable for Visual Studio 2015 .](https://www.microsoft.com/en-us/download/details.aspx?id=48145)
-2. [Install the Media Feature Pack.](https://support.microsoft.com/en-us/topic/media-feature-pack-list-for-windows-n-editions-c1c6fffa-d052-8338-7a79-a4bb980a700a)
-3. Restart your computer.
-4. [Download FFmpeg.](https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip)
+1. [Download FFmpeg.](https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip)
    (If you have [7-Zip](https://www.7-zip.org/) installed,
    [download the `.7z` archive instead](https://www.gyan.dev/ffmpeg/builds/ffmpeg-git-essentials.7z).)
-5. Unzip the downloaded archive into a new directory.
-6. [Download the latest version of Facemation for Windows.](https://github.com/FWDekker/facemation/releases/latest)
-7. Unzip the downloaded archive into a new directory.
-8. Enter the directory where you unzipped FFmpeg, enter the `bin` directory, and copy the file `ffmpeg.exe` to the
-   directory where you unzipped Facemation.
+2. Unzip the downloaded archive into a new directory.
+3. [Download the latest version of Facemation for Windows.](https://github.com/FWDekker/facemation/releases/latest)
+4. Unzip the downloaded archive into another new directory.
+5. Enter the directory where you unzipped FFmpeg, enter the `bin` directory, and copy the file `ffmpeg.exe` to the
+   directory where you unzipped Facemation.  
    You should now have `facemation.exe` and `ffmpeg.exe` in the same directory.
 
 ### Linux

diff --git a/build_linux.sh b/build_linux.sh
@@ -7,9 +7,9 @@ python -m pip install -r requirements.txt
 
 mkdir dist/
 mkdir dist/input/
-cp README.md dist/README.md
+cp README.md dist/README.txt
 cp src/main/resources/config_empty.py dist/config.py
 pyinstaller -y -F --add-data="src/main/resources/*:." src/main/python/facemation.py
-pip-licenses --with-license-file --no-license-path --output-file=dist/THIRD_PARTY_LICENSES
+pip-licenses --with-license-file --no-license-path --output-file=dist/THIRD_PARTY_LICENSES.txt
 
 python -m zipfile -c "facemation-linux-$(cat version).zip" dist/*
diff --git a/build_windows.ps1 b/build_windows.ps1
@@ -9,9 +9,9 @@ python -m pip install -r requirements.txt
 
 mkdir dist/
 mkdir dist/input/
-cp README.md dist/README.md
+cp README.md dist/README.txt
 cp src/main/resources/config_empty.py dist/config.py
 pyinstaller -y -F --add-data="src/main/resources/*;." src/main/python/facemation.py
-pip-licenses --with-license-file --no-license-path --output-file=dist/THIRD_PARTY_LICENSES
+pip-licenses --with-license-file --no-license-path --output-file=dist/THIRD_PARTY_LICENSES.txt
 
 python -m zipfile -c "facemation-windows-$(cat version).zip" $(Resolve-Path -Relative "dist/*")
diff --git a/requirements.txt b/requirements.txt
@@ -2,8 +2,7 @@ dlib==19.24.0
 mergedeep==1.3.4
 natsort==8.2.0
 numpy==1.24.1
-opencv-python==4.5.5.64
-Pillow==9.3.0
+Pillow==9.4.0
 pip-licenses==4.0.3
 pyinstaller==5.7.0
 tqdm==4.64.1
diff --git a/src/main/python/Cache.py b/src/main/python/Cache.py
@@ -4,16 +4,18 @@
 from pathlib import Path
 from typing import TypeVar, Generic, List
 
-import cv2
 import numpy as np
+from PIL import Image
 
 import Files
+from ImageLoader import load_image
 
 """The type of data stored in a cache."""
 T = TypeVar("T")
 
 
 # TODO: Keep track of `state`s in an index file, to prevent overly long filenames
+# TODO: Add some kind of versioning, in case code makes a breaking change w.r.t. users' existing caches
 class Cache(ABC, Generic[T]):
     """
     Stores data identified by a key, associated with a state that identifies the contents of the datum.
@@ -135,16 +137,16 @@ def _read_data(self, path: Path) -> T:
         pass
 
 
-class ImageCache(Cache[np.ndarray]):
+class ImageCache(Cache[Image]):
     """
-    Caches images from the CV2 library.
+    Caches images from the Pillow library.
     """
 
-    def _write_data(self, path: Path, data: np.ndarray) -> None:
-        cv2.imwrite(str(path), data)
+    def _write_data(self, path: Path, data: Image) -> None:
+        data.save(path)
 
-    def _read_data(self, path: Path) -> np.ndarray:
-        return cv2.imread(str(path))
+    def _read_data(self, path: Path) -> Image:
+        return load_image(path)
 
 
 class NdarrayCache(Cache[np.ndarray]):

diff --git a/src/main/python/ImageLoader.py b/src/main/python/ImageLoader.py
@@ -0,0 +1,22 @@
+import warnings
+from pathlib import Path
+
+from PIL import Image, ImageOps
+
+
+def load_image(path: Path) -> Image:
+    """
+    Loads the image at [path], rotated if necessary, without throwing annoying warnings.
+
+    :param path: the path to the image to load
+    :return: the image at [path], rotated if necessary
+    """
+
+    img = Image.open(path)
+
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        img = ImageOps.exif_transpose(img)
+
+    return img
+
diff --git a/src/main/python/stages/CaptionStage.py b/src/main/python/stages/CaptionStage.py
@@ -1,16 +1,15 @@
 import copy
-import math
 import sys
 from pathlib import Path
-from typing import Callable, Any, Dict
+from typing import Callable, Dict
 
-import cv2
-import numpy as np
-from PIL import Image
+from PIL import ImageDraw, ImageFont
 from tqdm import tqdm
 
 import Hasher
+import Resolver
 from Cache import ImageCache
+from ImageLoader import load_image
 from Pipeline import ImageInfo, ProcessingStage
 
 
@@ -20,14 +19,14 @@ class CaptionStage(ProcessingStage):
     """
 
     captioned_cache: ImageCache
-    caption_generator: Callable[[str, Any], str]
+    caption_generator: Callable[[str], str]
 
-    def __init__(self, cache_dir: str, caption_generator: Callable[[str, Image], str]):
+    def __init__(self, cache_dir: str, caption_generator: Callable[[str], str]):
         """
         Constructs a new `CaptionStage`.
 
         :param cache_dir: the directory to cache captioned images in
-        :param caption_generator: generates a caption based on the filename and PIL `Image` object
+        :param caption_generator: generates a caption based on the filename
         """
 
         self.captioned_cache = ImageCache(cache_dir, "captioned", ".jpg")
@@ -44,42 +43,24 @@ def process(self, imgs: Dict[Path, ImageInfo]) -> Dict[Path, ImageInfo]:
         processed_imgs = copy.deepcopy(imgs)
 
         for img_path, img_data in tqdm(imgs.items(), desc="Adding captions", file=sys.stdout):
-            caption = self.caption_generator(img_path.name, Image.open(img_path))
+            caption = self.caption_generator(img_path.name)
 
             processed_img_hash = Hasher.hash_file(img_data["processed_path"])
             state_hash = Hasher.hash_string(f"{processed_img_hash}-{caption}")
             if self.captioned_cache.has(img_data["hash"], state_hash):
                 processed_imgs[img_path]["processed_path"] = self.captioned_cache.path(img_data["hash"], state_hash)
                 continue
 
-            img = cv2.imread(str(img_data["processed_path"]))
-            img = write_on_image(img, caption, (0.05, 0.95), 0.05)
-
-            processed_imgs[img_path]["processed_path"] = self.captioned_cache.cache(img, img_data["hash"], state_hash)
-
-        return processed_imgs
+            img = load_image(img_data["processed_path"])
 
+            width, height = img.size
+            pos = (0.05 * width, 0.90 * height)
+            font = ImageFont.truetype(str(Resolver.resource_path("Roboto-Regular.ttf")), int(0.05 * height))
 
-def write_on_image(image: np.ndarray, text: str, pos: [float, float], text_height: float) -> np.ndarray:
-    """
-    Writes [text] on [image] at coordinates [pos] with a height of [text_height].
+            img_draw = ImageDraw.Draw(img)
+            img_draw.text(pos, caption, font=font, stroke_fill=(0, 0, 0), stroke_width=8)
+            img_draw.text(pos, caption, font=font, stroke_fill=(255, 255, 255), stroke_width=1)
 
-    :param image: the image to write text on; this image is not modified
-    :param text: the text to write onto [image]
-    :param pos: the coordinates to place the text at, as a ratio of the size of [image]
-    :param text_height: the height of the text, as a ratio of the height of [image]
-    :return: a copy of [image] with text written on it
-    """
+            processed_imgs[img_path]["processed_path"] = self.captioned_cache.cache(img, img_data["hash"], state_hash)
 
-    height, width = image.shape[:2]
-    text_scale = cv2.getTextSize(text, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=1, thickness=32)
-    text_scale = text_height / (text_scale[0][1] / height)
-    text_pos = math.floor(pos[0] * width), math.floor(pos[1] * height)
-
-    image = cv2.putText(image, text, text_pos,
-                        fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=text_scale,
-                        color=(0, 0, 0), thickness=32, lineType=cv2.LINE_AA)
-    image = cv2.putText(image, text, text_pos,
-                        fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=text_scale,
-                        color=(255, 255, 255), thickness=16, lineType=cv2.LINE_AA)
-    return image
+        return processed_imgs
diff --git a/src/main/python/stages/FindFacesStage.py b/src/main/python/stages/FindFacesStage.py
@@ -3,14 +3,15 @@
 from pathlib import Path
 from typing import Dict, Tuple, Callable
 
-import cv2
 import dlib
 import numpy as np
+from PIL import ImageDraw
 from tqdm.contrib.concurrent import process_map
 
 import Files
 import Resolver
 from Cache import NdarrayCache
+from ImageLoader import load_image
 from Pipeline import PreprocessingStage, ImageInfo
 from UserException import UserException
 
@@ -70,21 +71,21 @@ def preprocess(self, imgs: Dict[Path, ImageInfo]) -> Dict[Path, ImageInfo]:
                                 file=sys.stdout))
 
 
-def find_face(img: Tuple[Path, ImageInfo], face_cache: NdarrayCache, error_dir: str) -> Tuple[Path, ImageInfo]:
+def find_face(img_tuple: Tuple[Path, ImageInfo], face_cache: NdarrayCache, error_dir: str) -> Tuple[Path, ImageInfo]:
     """
-    Finds the face in [img], expressed as the positions of the eyes, caching the face data in [face_cache].
+    Finds the face in [img_tuple], expressed as the positions of the eyes, caching the face data in [face_cache].
 
     Raises a [UserException] if no or multiple faces are found in an image, and [g_face_selection_overrides] is not
     configured for this image. Additionally, if an exception is thrown, the image is written to [error_dir] with
     visualized debugging information.
 
-    :param img: the original input path and the pre-processing data of the image to find a face in
+    :param img_tuple: the original input path and the pre-processing data of the image to find a face in
     :param face_cache: the cache to store the found face in
     :param error_dir: the directory to write debugging information in to assist the user
     :return: the original input path and the found face
     """
 
-    img_path, img_data = img
+    img_path, img_data = img_tuple
     if face_cache.has(img_data["hash"]):
         return img_path, {"eyes": face_cache.load(img_data["hash"])}
 
@@ -94,13 +95,13 @@ def find_face(img: Tuple[Path, ImageInfo], face_cache: NdarrayCache, error_dir:
     shape_predictor = dlib.shape_predictor(str(Resolver.resource_path("shape_predictor_5_face_landmarks.dat")))
 
     # Find face
-    img = cv2.imread(str(img_path))
-    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
-
+    img = load_image(img_path)
+    # noinspection PyTypeChecker
+    img_np = np.array(img)
     faces = dlib.full_object_detections()
-    detections = detector(img_rgb, 1)
+    detections = detector(img_np, 1)
     for detection in detections:
-        faces.append(shape_predictor(img_rgb, detection))
+        faces.append(shape_predictor(img_np, detection))
 
     # Determine what to do if there are multiple faces
     if len(faces) == 0:
@@ -113,11 +114,14 @@ def find_face(img: Tuple[Path, ImageInfo], face_cache: NdarrayCache, error_dir:
         if img_name in g_face_selection_overrides:
             face = sorted(list(faces), key=g_face_selection_overrides[img_name])[0]
         else:
+            img_draw = ImageDraw.Draw(img)
+
             bb = [it.rect for it in faces]
             bb = [((it.left(), it.top()), (it.right(), it.bottom())) for it in bb]
             for it in bb:
-                img = cv2.rectangle(img, it[0], it[1], (255, 0, 0), 5)
-            cv2.imwrite(f"{error_dir}/{img_name}", img)
+                img_draw.rectangle(it[0], it[1], (255, 0, 0), 5)
+
+            img.save(f"{error_dir}/{img_name}")
 
             raise UserException(f"Too many faces: Found {len(faces)} in '{img_path}'. "
                                 f"The image has been stored in '{error_dir}' with squares drawn around all faces that "

diff --git a/src/main/python/stages/NormalizeStage.py b/src/main/python/stages/NormalizeStage.py
@@ -4,12 +4,13 @@
 from pathlib import Path
 from typing import Dict
 
-import cv2
 import numpy as np
+from PIL import Image
 from tqdm import tqdm
 
 import Hasher
 from Cache import ImageCache
+from ImageLoader import load_image
 from Pipeline import ProcessingStage, ImageInfo
 from UserException import UserException
 
@@ -83,30 +84,23 @@ def process(self, imgs: Dict[Path, ImageInfo]) -> Dict[Path, ImageInfo]:
                 processed_imgs[img_path]["processed_path"] = self.normalized_cache.path(img_data["hash"], state_hash)
                 continue
 
-            # Read image
-            img = cv2.imread(str(img_data["processed_path"]))
-
-            # Resize
-            img = cv2.resize(img, scaled_img_dims[img_path])
-
-            # Translate
-            translation = np.float32([[1, 0, translations[img_path][0]], [0, 1, translations[img_path][1]]])
-            img = cv2.warpAffine(img, translation, scaled_img_dims[img_path] + translations[img_path])
-
-            # Rotate
+            # Validate normalization parameters
             if math.fabs(math.degrees(angles[img_path])) >= 45.0:
                 raise UserException(f"Image '{img_path}' is rotated by {math.degrees(angles[img_path])}, but "
                                     f"Facemation only supports angles up to 45 degrees (but preferably much lower). "
                                     f"You should manually rotate the image and crop out the relevant parts, or remove "
                                     f"the image from the inputs altogether.")
 
-            rotation = cv2.getRotationMatrix2D(max_scaled_eye_center.astype(float),
-                                               -math.degrees(angles[img_path]),
-                                               1.0)
-            img = cv2.warpAffine(img, rotation, img.shape[1::-1], flags=cv2.INTER_LINEAR)
+            # Normalize image
+            translated_dims = tuple(scaled_img_dims[img_path] + translations[img_path])
+            translation_matrix = (1, 0, -translations[img_path][0], 0, 1, -translations[img_path][1])
 
-            # Crop
-            img = img[min_inner_boundaries[1]:min_inner_boundaries[3], min_inner_boundaries[0]:min_inner_boundaries[2]]
+            img = load_image(img_data["processed_path"])
+            img = img.resize(scaled_img_dims[img_path])
+            img = img.transform(translated_dims, Image.AFFINE, translation_matrix)
+            img = img.rotate(-math.degrees(angles[img_path]), center=tuple(max_scaled_eye_center))
+            img = img.crop((min_inner_boundaries[0], min_inner_boundaries[1],
+                            min_inner_boundaries[2], min_inner_boundaries[3]))
 
             # Store normalized image
             processed_imgs[img_path]["processed_path"] = self.normalized_cache.cache(img, img_data["hash"], state_hash)

diff --git a/src/main/resources/Roboto-Regular.ttf b/src/main/resources/Roboto-Regular.ttf
diff --git a/src/main/resources/config_default.py b/src/main/resources/config_default.py
@@ -44,10 +44,8 @@
     "caption": {
         # Set to `True` to add a caption to each frame.
         "enabled": False,
-        # Given the image's filename and the `Image` object from the PIL library, generates the string caption to add to
-        # the image. See https://pillow.readthedocs.io/en/stable/reference/Image.html for more information on the
-        # `Image` object.
-        "generator": (lambda filename, image: f"{filename}: {image.size}"),
+        # Generates the caption for the image using the image's filename (including extension).
+        "generator": (lambda filename: filename),
     },
 
     # Combine the processed images into a video.