Skip to content

Commit

Permalink
Speed up Image Enumeration (#17)
Browse files Browse the repository at this point in the history
* feat: speed up dataset enumeration

Rather than attempting to load every image, now by default
we check if the file extention is known by Pillow the image
libary. The enhanced (checks can load each image) can be
utilised still by passing enhanced_validation=True to
`enumerate_images()`.

* style: lint fixes

* style: readability
  • Loading branch information
bencevans authored Aug 16, 2022
1 parent ec5de04 commit af5b1c7
Showing 1 changed file with 19 additions and 4 deletions.
23 changes: 19 additions & 4 deletions camtrapml/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
Dataset Handling
"""

from typing import Union
from pathlib import Path
from os import walk
from typing import Union
from PIL import Image
from camtrapml.image.utils import is_image


Expand All @@ -25,14 +26,28 @@ def __init__(self, path: Path, name: Union[None, str] = None):
if not self.path.exists() or not self.path.is_dir():
raise ValueError(f"{self.path} is not a directory")

def enumerate_images(self):
def enumerate_images(self, enhanced_validation: bool = False):
"""
Enumerates all images in the dataset.
"""

supported_extensions = {
extention.lower() for extention, image_format in
Image.registered_extensions().items()
if image_format in Image.OPEN
}

for root, _, files in walk(self.path):
for file in files:
if is_image(Path(root) / file):
yield Path(root) / file
file_path = Path(root) / file

if enhanced_validation:
if is_image(file_path):
yield file_path

else:
if file_path.suffix.lower() in supported_extensions:
yield file_path

@staticmethod
def from_coco(source):
Expand Down

0 comments on commit af5b1c7

Please sign in to comment.