Skip to content

Commit

Permalink
Rewrite to avoid dependency on OpenCV
Browse files Browse the repository at this point in the history
  • Loading branch information
FWDekker committed Jan 3, 2023
1 parent 8ea8d81 commit 3279fe8
Show file tree
Hide file tree
Showing 11 changed files with 89 additions and 92 deletions.
15 changes: 6 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,14 @@ compiles these frames into a timelapse.

## Installation
### Windows
1. [Install Visual C++ Redistributable for Visual Studio 2015 .](https://www.microsoft.com/en-us/download/details.aspx?id=48145)
2. [Install the Media Feature Pack.](https://support.microsoft.com/en-us/topic/media-feature-pack-list-for-windows-n-editions-c1c6fffa-d052-8338-7a79-a4bb980a700a)
3. Restart your computer.
4. [Download FFmpeg.](https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip)
1. [Download FFmpeg.](https://www.gyan.dev/ffmpeg/builds/ffmpeg-release-essentials.zip)
(If you have [7-Zip](https://www.7-zip.org/) installed,
[download the `.7z` archive instead](https://www.gyan.dev/ffmpeg/builds/ffmpeg-git-essentials.7z).)
5. Unzip the downloaded archive into a new directory.
6. [Download the latest version of Facemation for Windows.](https://github.com/FWDekker/facemation/releases/latest)
7. Unzip the downloaded archive into a new directory.
8. Enter the directory where you unzipped FFmpeg, enter the `bin` directory, and copy the file `ffmpeg.exe` to the
directory where you unzipped Facemation.
2. Unzip the downloaded archive into a new directory.
3. [Download the latest version of Facemation for Windows.](https://github.com/FWDekker/facemation/releases/latest)
4. Unzip the downloaded archive into another new directory.
5. Enter the directory where you unzipped FFmpeg, enter the `bin` directory, and copy the file `ffmpeg.exe` to the
directory where you unzipped Facemation.
You should now have `facemation.exe` and `ffmpeg.exe` in the same directory.

### Linux
Expand Down
4 changes: 2 additions & 2 deletions build_linux.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ python -m pip install -r requirements.txt

mkdir dist/
mkdir dist/input/
cp README.md dist/README.md
cp README.md dist/README.txt
cp src/main/resources/config_empty.py dist/config.py
pyinstaller -y -F --add-data="src/main/resources/*:." src/main/python/facemation.py
pip-licenses --with-license-file --no-license-path --output-file=dist/THIRD_PARTY_LICENSES
pip-licenses --with-license-file --no-license-path --output-file=dist/THIRD_PARTY_LICENSES.txt

python -m zipfile -c "facemation-linux-$(cat version).zip" dist/*
4 changes: 2 additions & 2 deletions build_windows.ps1
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ python -m pip install -r requirements.txt

mkdir dist/
mkdir dist/input/
cp README.md dist/README.md
cp README.md dist/README.txt
cp src/main/resources/config_empty.py dist/config.py
pyinstaller -y -F --add-data="src/main/resources/*;." src/main/python/facemation.py
pip-licenses --with-license-file --no-license-path --output-file=dist/THIRD_PARTY_LICENSES
pip-licenses --with-license-file --no-license-path --output-file=dist/THIRD_PARTY_LICENSES.txt

python -m zipfile -c "facemation-windows-$(cat version).zip" $(Resolve-Path -Relative "dist/*")
3 changes: 1 addition & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@ dlib==19.24.0
mergedeep==1.3.4
natsort==8.2.0
numpy==1.24.1
opencv-python==4.5.5.64
Pillow==9.3.0
Pillow==9.4.0
pip-licenses==4.0.3
pyinstaller==5.7.0
tqdm==4.64.1
16 changes: 9 additions & 7 deletions src/main/python/Cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,18 @@
from pathlib import Path
from typing import TypeVar, Generic, List

import cv2
import numpy as np
from PIL import Image

import Files
from ImageLoader import load_image

"""The type of data stored in a cache."""
T = TypeVar("T")


# TODO: Keep track of `state`s in an index file, to prevent overly long filenames
# TODO: Add some kind of versioning, in case code makes a breaking change w.r.t. users' existing caches
class Cache(ABC, Generic[T]):
"""
Stores data identified by a key, associated with a state that identifies the contents of the datum.
Expand Down Expand Up @@ -135,16 +137,16 @@ def _read_data(self, path: Path) -> T:
pass


class ImageCache(Cache[np.ndarray]):
class ImageCache(Cache[Image]):
"""
Caches images from the CV2 library.
Caches images from the Pillow library.
"""

def _write_data(self, path: Path, data: np.ndarray) -> None:
cv2.imwrite(str(path), data)
def _write_data(self, path: Path, data: Image) -> None:
data.save(path)

def _read_data(self, path: Path) -> np.ndarray:
return cv2.imread(str(path))
def _read_data(self, path: Path) -> Image:
return load_image(path)


class NdarrayCache(Cache[np.ndarray]):
Expand Down
22 changes: 22 additions & 0 deletions src/main/python/ImageLoader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import warnings
from pathlib import Path

from PIL import Image, ImageOps


def load_image(path: Path) -> Image:
"""
Loads the image at [path], rotated if necessary, without throwing annoying warnings.
:param path: the path to the image to load
:return: the image at [path], rotated if necessary
"""

img = Image.open(path)

with warnings.catch_warnings():
warnings.simplefilter("ignore")
img = ImageOps.exif_transpose(img)

return img

53 changes: 17 additions & 36 deletions src/main/python/stages/CaptionStage.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import copy
import math
import sys
from pathlib import Path
from typing import Callable, Any, Dict
from typing import Callable, Dict

import cv2
import numpy as np
from PIL import Image
from PIL import ImageDraw, ImageFont
from tqdm import tqdm

import Hasher
import Resolver
from Cache import ImageCache
from ImageLoader import load_image
from Pipeline import ImageInfo, ProcessingStage


Expand All @@ -20,14 +19,14 @@ class CaptionStage(ProcessingStage):
"""

captioned_cache: ImageCache
caption_generator: Callable[[str, Any], str]
caption_generator: Callable[[str], str]

def __init__(self, cache_dir: str, caption_generator: Callable[[str, Image], str]):
def __init__(self, cache_dir: str, caption_generator: Callable[[str], str]):
"""
Constructs a new `CaptionStage`.
:param cache_dir: the directory to cache captioned images in
:param caption_generator: generates a caption based on the filename and PIL `Image` object
:param caption_generator: generates a caption based on the filename
"""

self.captioned_cache = ImageCache(cache_dir, "captioned", ".jpg")
Expand All @@ -44,42 +43,24 @@ def process(self, imgs: Dict[Path, ImageInfo]) -> Dict[Path, ImageInfo]:
processed_imgs = copy.deepcopy(imgs)

for img_path, img_data in tqdm(imgs.items(), desc="Adding captions", file=sys.stdout):
caption = self.caption_generator(img_path.name, Image.open(img_path))
caption = self.caption_generator(img_path.name)

processed_img_hash = Hasher.hash_file(img_data["processed_path"])
state_hash = Hasher.hash_string(f"{processed_img_hash}-{caption}")
if self.captioned_cache.has(img_data["hash"], state_hash):
processed_imgs[img_path]["processed_path"] = self.captioned_cache.path(img_data["hash"], state_hash)
continue

img = cv2.imread(str(img_data["processed_path"]))
img = write_on_image(img, caption, (0.05, 0.95), 0.05)

processed_imgs[img_path]["processed_path"] = self.captioned_cache.cache(img, img_data["hash"], state_hash)

return processed_imgs
img = load_image(img_data["processed_path"])

width, height = img.size
pos = (0.05 * width, 0.90 * height)
font = ImageFont.truetype(str(Resolver.resource_path("Roboto-Regular.ttf")), int(0.05 * height))

def write_on_image(image: np.ndarray, text: str, pos: [float, float], text_height: float) -> np.ndarray:
"""
Writes [text] on [image] at coordinates [pos] with a height of [text_height].
img_draw = ImageDraw.Draw(img)
img_draw.text(pos, caption, font=font, stroke_fill=(0, 0, 0), stroke_width=8)
img_draw.text(pos, caption, font=font, stroke_fill=(255, 255, 255), stroke_width=1)

:param image: the image to write text on; this image is not modified
:param text: the text to write onto [image]
:param pos: the coordinates to place the text at, as a ratio of the size of [image]
:param text_height: the height of the text, as a ratio of the height of [image]
:return: a copy of [image] with text written on it
"""
processed_imgs[img_path]["processed_path"] = self.captioned_cache.cache(img, img_data["hash"], state_hash)

height, width = image.shape[:2]
text_scale = cv2.getTextSize(text, fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=1, thickness=32)
text_scale = text_height / (text_scale[0][1] / height)
text_pos = math.floor(pos[0] * width), math.floor(pos[1] * height)

image = cv2.putText(image, text, text_pos,
fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=text_scale,
color=(0, 0, 0), thickness=32, lineType=cv2.LINE_AA)
image = cv2.putText(image, text, text_pos,
fontFace=cv2.FONT_HERSHEY_COMPLEX, fontScale=text_scale,
color=(255, 255, 255), thickness=16, lineType=cv2.LINE_AA)
return image
return processed_imgs
28 changes: 16 additions & 12 deletions src/main/python/stages/FindFacesStage.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
from pathlib import Path
from typing import Dict, Tuple, Callable

import cv2
import dlib
import numpy as np
from PIL import ImageDraw
from tqdm.contrib.concurrent import process_map

import Files
import Resolver
from Cache import NdarrayCache
from ImageLoader import load_image
from Pipeline import PreprocessingStage, ImageInfo
from UserException import UserException

Expand Down Expand Up @@ -70,21 +71,21 @@ def preprocess(self, imgs: Dict[Path, ImageInfo]) -> Dict[Path, ImageInfo]:
file=sys.stdout))


def find_face(img: Tuple[Path, ImageInfo], face_cache: NdarrayCache, error_dir: str) -> Tuple[Path, ImageInfo]:
def find_face(img_tuple: Tuple[Path, ImageInfo], face_cache: NdarrayCache, error_dir: str) -> Tuple[Path, ImageInfo]:
"""
Finds the face in [img], expressed as the positions of the eyes, caching the face data in [face_cache].
Finds the face in [img_tuple], expressed as the positions of the eyes, caching the face data in [face_cache].
Raises a [UserException] if no or multiple faces are found in an image, and [g_face_selection_overrides] is not
configured for this image. Additionally, if an exception is thrown, the image is written to [error_dir] with
visualized debugging information.
:param img: the original input path and the pre-processing data of the image to find a face in
:param img_tuple: the original input path and the pre-processing data of the image to find a face in
:param face_cache: the cache to store the found face in
:param error_dir: the directory to write debugging information in to assist the user
:return: the original input path and the found face
"""

img_path, img_data = img
img_path, img_data = img_tuple
if face_cache.has(img_data["hash"]):
return img_path, {"eyes": face_cache.load(img_data["hash"])}

Expand All @@ -94,13 +95,13 @@ def find_face(img: Tuple[Path, ImageInfo], face_cache: NdarrayCache, error_dir:
shape_predictor = dlib.shape_predictor(str(Resolver.resource_path("shape_predictor_5_face_landmarks.dat")))

# Find face
img = cv2.imread(str(img_path))
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

img = load_image(img_path)
# noinspection PyTypeChecker
img_np = np.array(img)
faces = dlib.full_object_detections()
detections = detector(img_rgb, 1)
detections = detector(img_np, 1)
for detection in detections:
faces.append(shape_predictor(img_rgb, detection))
faces.append(shape_predictor(img_np, detection))

# Determine what to do if there are multiple faces
if len(faces) == 0:
Expand All @@ -113,11 +114,14 @@ def find_face(img: Tuple[Path, ImageInfo], face_cache: NdarrayCache, error_dir:
if img_name in g_face_selection_overrides:
face = sorted(list(faces), key=g_face_selection_overrides[img_name])[0]
else:
img_draw = ImageDraw.Draw(img)

bb = [it.rect for it in faces]
bb = [((it.left(), it.top()), (it.right(), it.bottom())) for it in bb]
for it in bb:
img = cv2.rectangle(img, it[0], it[1], (255, 0, 0), 5)
cv2.imwrite(f"{error_dir}/{img_name}", img)
img_draw.rectangle(it[0], it[1], (255, 0, 0), 5)

img.save(f"{error_dir}/{img_name}")

raise UserException(f"Too many faces: Found {len(faces)} in '{img_path}'. "
f"The image has been stored in '{error_dir}' with squares drawn around all faces that "
Expand Down
30 changes: 12 additions & 18 deletions src/main/python/stages/NormalizeStage.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,13 @@
from pathlib import Path
from typing import Dict

import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm

import Hasher
from Cache import ImageCache
from ImageLoader import load_image
from Pipeline import ProcessingStage, ImageInfo
from UserException import UserException

Expand Down Expand Up @@ -83,30 +84,23 @@ def process(self, imgs: Dict[Path, ImageInfo]) -> Dict[Path, ImageInfo]:
processed_imgs[img_path]["processed_path"] = self.normalized_cache.path(img_data["hash"], state_hash)
continue

# Read image
img = cv2.imread(str(img_data["processed_path"]))

# Resize
img = cv2.resize(img, scaled_img_dims[img_path])

# Translate
translation = np.float32([[1, 0, translations[img_path][0]], [0, 1, translations[img_path][1]]])
img = cv2.warpAffine(img, translation, scaled_img_dims[img_path] + translations[img_path])

# Rotate
# Validate normalization parameters
if math.fabs(math.degrees(angles[img_path])) >= 45.0:
raise UserException(f"Image '{img_path}' is rotated by {math.degrees(angles[img_path])}, but "
f"Facemation only supports angles up to 45 degrees (but preferably much lower). "
f"You should manually rotate the image and crop out the relevant parts, or remove "
f"the image from the inputs altogether.")

rotation = cv2.getRotationMatrix2D(max_scaled_eye_center.astype(float),
-math.degrees(angles[img_path]),
1.0)
img = cv2.warpAffine(img, rotation, img.shape[1::-1], flags=cv2.INTER_LINEAR)
# Normalize image
translated_dims = tuple(scaled_img_dims[img_path] + translations[img_path])
translation_matrix = (1, 0, -translations[img_path][0], 0, 1, -translations[img_path][1])

# Crop
img = img[min_inner_boundaries[1]:min_inner_boundaries[3], min_inner_boundaries[0]:min_inner_boundaries[2]]
img = load_image(img_data["processed_path"])
img = img.resize(scaled_img_dims[img_path])
img = img.transform(translated_dims, Image.AFFINE, translation_matrix)
img = img.rotate(-math.degrees(angles[img_path]), center=tuple(max_scaled_eye_center))
img = img.crop((min_inner_boundaries[0], min_inner_boundaries[1],
min_inner_boundaries[2], min_inner_boundaries[3]))

# Store normalized image
processed_imgs[img_path]["processed_path"] = self.normalized_cache.cache(img, img_data["hash"], state_hash)
Expand Down
Binary file added src/main/resources/Roboto-Regular.ttf
Binary file not shown.
6 changes: 2 additions & 4 deletions src/main/resources/config_default.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,8 @@
"caption": {
# Set to `True` to add a caption to each frame.
"enabled": False,
# Given the image's filename and the `Image` object from the PIL library, generates the string caption to add to
# the image. See https://pillow.readthedocs.io/en/stable/reference/Image.html for more information on the
# `Image` object.
"generator": (lambda filename, image: f"{filename}: {image.size}"),
# Generates the caption for the image using the image's filename (including extension).
"generator": (lambda filename: filename),
},

# Combine the processed images into a video.
Expand Down

0 comments on commit 3279fe8

Please sign in to comment.